diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-06 11:11:57 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-06 11:11:57 -0400 |
| commit | 18a1a7a1d862ae0794a0179473d08a414dd49234 (patch) | |
| tree | 013ffe8b7fbbe9169801d0be1a780ee9bf53c08e | |
| parent | 04535d273ee3edacd9551b2512b4e939ba20277f (diff) | |
| parent | 5eb0bdf84433eb7b7ad4ba92a80aac57ad4b46ea (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile
Pull arch/tile updates from Chris Metcalf:
"These fix a few stray build issues seen in linux-next, and also add
the minimal required support for perf to tilegx"
* git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile:
arch/tile: remove unused variable 'devcap'
tile: Fix vDSO compilation issue with allyesconfig
perf tools: Allow building for tile
tile/perf: Support perf_events on tilegx and tilepro
tile: Enable NMIs on return from handle_nmi() without errors
tile: Add support for handling PMC hardware
tile: don't use __get_cpu_var() with structure-typed arguments
tile: avoid overflow in ns2cycles
| -rw-r--r-- | arch/tile/Kconfig | 6 | ||||
| -rw-r--r-- | arch/tile/include/asm/perf_event.h | 22 | ||||
| -rw-r--r-- | arch/tile/include/asm/pmc.h | 64 | ||||
| -rw-r--r-- | arch/tile/kernel/Makefile | 2 | ||||
| -rw-r--r-- | arch/tile/kernel/intvec_32.S | 24 | ||||
| -rw-r--r-- | arch/tile/kernel/intvec_64.S | 24 | ||||
| -rw-r--r-- | arch/tile/kernel/irq.c | 18 | ||||
| -rw-r--r-- | arch/tile/kernel/messaging.c | 4 | ||||
| -rw-r--r-- | arch/tile/kernel/pci.c | 2 | ||||
| -rw-r--r-- | arch/tile/kernel/perf_event.c | 1005 | ||||
| -rw-r--r-- | arch/tile/kernel/pmc.c | 121 | ||||
| -rw-r--r-- | arch/tile/kernel/time.c | 10 | ||||
| -rw-r--r-- | arch/tile/kernel/vdso/Makefile | 2 | ||||
| -rw-r--r-- | tools/perf/config/Makefile.arch | 3 | ||||
| -rw-r--r-- | tools/perf/perf.h | 8 |
15 files changed, 1295 insertions, 20 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index b3692ce78f90..31c8c6223995 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | 3 | ||
| 4 | config TILE | 4 | config TILE |
| 5 | def_bool y | 5 | def_bool y |
| 6 | select HAVE_PERF_EVENTS | ||
| 7 | select USE_PMC if PERF_EVENTS | ||
| 6 | select HAVE_DMA_ATTRS | 8 | select HAVE_DMA_ATTRS |
| 7 | select HAVE_DMA_API_DEBUG | 9 | select HAVE_DMA_API_DEBUG |
| 8 | select HAVE_KVM if !TILEGX | 10 | select HAVE_KVM if !TILEGX |
| @@ -66,6 +68,10 @@ config HUGETLB_SUPER_PAGES | |||
| 66 | config GENERIC_TIME_VSYSCALL | 68 | config GENERIC_TIME_VSYSCALL |
| 67 | def_bool y | 69 | def_bool y |
| 68 | 70 | ||
| 71 | # Enable PMC if PERF_EVENTS, OPROFILE, or WATCHPOINTS are enabled. | ||
| 72 | config USE_PMC | ||
| 73 | bool | ||
| 74 | |||
| 69 | # FIXME: tilegx can implement a more efficient rwsem. | 75 | # FIXME: tilegx can implement a more efficient rwsem. |
| 70 | config RWSEM_GENERIC_SPINLOCK | 76 | config RWSEM_GENERIC_SPINLOCK |
| 71 | def_bool y | 77 | def_bool y |
diff --git a/arch/tile/include/asm/perf_event.h b/arch/tile/include/asm/perf_event.h new file mode 100644 index 000000000000..59c5b164e5b6 --- /dev/null +++ b/arch/tile/include/asm/perf_event.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2014 Tilera Corporation. All Rights Reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public License | ||
| 6 | * as published by the Free Software Foundation, version 2. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, but | ||
| 9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef _ASM_TILE_PERF_EVENT_H | ||
| 16 | #define _ASM_TILE_PERF_EVENT_H | ||
| 17 | |||
| 18 | #include <linux/percpu.h> | ||
| 19 | DECLARE_PER_CPU(u64, perf_irqs); | ||
| 20 | |||
| 21 | unsigned long handle_syscall_link_address(void); | ||
| 22 | #endif /* _ASM_TILE_PERF_EVENT_H */ | ||
diff --git a/arch/tile/include/asm/pmc.h b/arch/tile/include/asm/pmc.h new file mode 100644 index 000000000000..7ae3956d9008 --- /dev/null +++ b/arch/tile/include/asm/pmc.h | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2014 Tilera Corporation. All Rights Reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public License | ||
| 6 | * as published by the Free Software Foundation, version 2. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, but | ||
| 9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef _ASM_TILE_PMC_H | ||
| 16 | #define _ASM_TILE_PMC_H | ||
| 17 | |||
| 18 | #include <linux/ptrace.h> | ||
| 19 | |||
| 20 | #define TILE_BASE_COUNTERS 2 | ||
| 21 | |||
| 22 | /* Bitfields below are derived from SPR PERF_COUNT_CTL*/ | ||
| 23 | #ifndef __tilegx__ | ||
| 24 | /* PERF_COUNT_CTL on TILEPro */ | ||
| 25 | #define TILE_CTL_EXCL_USER (1 << 7) /* exclude user level */ | ||
| 26 | #define TILE_CTL_EXCL_KERNEL (1 << 8) /* exclude kernel level */ | ||
| 27 | #define TILE_CTL_EXCL_HV (1 << 9) /* exclude hypervisor level */ | ||
| 28 | |||
| 29 | #define TILE_SEL_MASK 0x7f /* 7 bits for event SEL, | ||
| 30 | COUNT_0_SEL */ | ||
| 31 | #define TILE_PLM_MASK 0x780 /* 4 bits priv level msks, | ||
| 32 | COUNT_0_MASK*/ | ||
| 33 | #define TILE_EVENT_MASK (TILE_SEL_MASK | TILE_PLM_MASK) | ||
| 34 | |||
| 35 | #else /* __tilegx__*/ | ||
| 36 | /* PERF_COUNT_CTL on TILEGx*/ | ||
| 37 | #define TILE_CTL_EXCL_USER (1 << 10) /* exclude user level */ | ||
| 38 | #define TILE_CTL_EXCL_KERNEL (1 << 11) /* exclude kernel level */ | ||
| 39 | #define TILE_CTL_EXCL_HV (1 << 12) /* exclude hypervisor level */ | ||
| 40 | |||
| 41 | #define TILE_SEL_MASK 0x3f /* 6 bits for event SEL, | ||
| 42 | COUNT_0_SEL*/ | ||
| 43 | #define TILE_BOX_MASK 0x1c0 /* 3 bits box msks, | ||
| 44 | COUNT_0_BOX */ | ||
| 45 | #define TILE_PLM_MASK 0x3c00 /* 4 bits priv level msks, | ||
| 46 | COUNT_0_MASK */ | ||
| 47 | #define TILE_EVENT_MASK (TILE_SEL_MASK | TILE_BOX_MASK | TILE_PLM_MASK) | ||
| 48 | #endif /* __tilegx__*/ | ||
| 49 | |||
| 50 | /* Takes register and fault number. Returns error to disable the interrupt. */ | ||
| 51 | typedef int (*perf_irq_t)(struct pt_regs *, int); | ||
| 52 | |||
| 53 | int userspace_perf_handler(struct pt_regs *regs, int fault); | ||
| 54 | |||
| 55 | perf_irq_t reserve_pmc_hardware(perf_irq_t new_perf_irq); | ||
| 56 | void release_pmc_hardware(void); | ||
| 57 | |||
| 58 | unsigned long pmc_get_overflow(void); | ||
| 59 | void pmc_ack_overflow(unsigned long status); | ||
| 60 | |||
| 61 | void unmask_pmc_interrupts(void); | ||
| 62 | void mask_pmc_interrupts(void); | ||
| 63 | |||
| 64 | #endif /* _ASM_TILE_PMC_H */ | ||
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index 27a2bf39dae8..21f77bf68c69 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile | |||
| @@ -25,6 +25,8 @@ obj-$(CONFIG_PCI) += pci_gx.o | |||
| 25 | else | 25 | else |
| 26 | obj-$(CONFIG_PCI) += pci.o | 26 | obj-$(CONFIG_PCI) += pci.o |
| 27 | endif | 27 | endif |
| 28 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | ||
| 29 | obj-$(CONFIG_USE_PMC) += pmc.o | ||
| 28 | obj-$(CONFIG_TILE_USB) += usb.o | 30 | obj-$(CONFIG_TILE_USB) += usb.o |
| 29 | obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o | 31 | obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o |
| 30 | obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o mcount_64.o | 32 | obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o mcount_64.o |
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 2cbe6d5dd6b0..cdbda45a4e4b 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S | |||
| @@ -313,13 +313,13 @@ intvec_\vecname: | |||
| 313 | movei r3, 0 | 313 | movei r3, 0 |
| 314 | } | 314 | } |
| 315 | .else | 315 | .else |
| 316 | .ifc \c_routine, op_handle_perf_interrupt | 316 | .ifc \c_routine, handle_perf_interrupt |
| 317 | { | 317 | { |
| 318 | mfspr r2, PERF_COUNT_STS | 318 | mfspr r2, PERF_COUNT_STS |
| 319 | movei r3, -1 /* not used, but set for consistency */ | 319 | movei r3, -1 /* not used, but set for consistency */ |
| 320 | } | 320 | } |
| 321 | .else | 321 | .else |
| 322 | .ifc \c_routine, op_handle_aux_perf_interrupt | 322 | .ifc \c_routine, handle_perf_interrupt |
| 323 | { | 323 | { |
| 324 | mfspr r2, AUX_PERF_COUNT_STS | 324 | mfspr r2, AUX_PERF_COUNT_STS |
| 325 | movei r3, -1 /* not used, but set for consistency */ | 325 | movei r3, -1 /* not used, but set for consistency */ |
| @@ -946,6 +946,13 @@ STD_ENTRY(interrupt_return) | |||
| 946 | bzt r30, .Lrestore_regs | 946 | bzt r30, .Lrestore_regs |
| 947 | 3: | 947 | 3: |
| 948 | 948 | ||
| 949 | /* We are relying on INT_PERF_COUNT at 33, and AUX_PERF_COUNT at 48 */ | ||
| 950 | { | ||
| 951 | moveli r0, lo16(1 << (INT_PERF_COUNT - 32)) | ||
| 952 | bz r31, .Lrestore_regs | ||
| 953 | } | ||
| 954 | auli r0, r0, ha16(1 << (INT_AUX_PERF_COUNT - 32)) | ||
| 955 | mtspr SPR_INTERRUPT_MASK_RESET_K_1, r0 | ||
| 949 | 956 | ||
| 950 | /* | 957 | /* |
| 951 | * We now commit to returning from this interrupt, since we will be | 958 | * We now commit to returning from this interrupt, since we will be |
| @@ -1171,6 +1178,10 @@ handle_nmi: | |||
| 1171 | PTREGS_PTR(r0, PTREGS_OFFSET_BASE) | 1178 | PTREGS_PTR(r0, PTREGS_OFFSET_BASE) |
| 1172 | } | 1179 | } |
| 1173 | FEEDBACK_REENTER(handle_nmi) | 1180 | FEEDBACK_REENTER(handle_nmi) |
| 1181 | { | ||
| 1182 | movei r30, 1 | ||
| 1183 | seq r31, r0, zero | ||
| 1184 | } | ||
| 1174 | j interrupt_return | 1185 | j interrupt_return |
| 1175 | STD_ENDPROC(handle_nmi) | 1186 | STD_ENDPROC(handle_nmi) |
| 1176 | 1187 | ||
| @@ -1835,8 +1846,9 @@ int_unalign: | |||
| 1835 | /* Include .intrpt array of interrupt vectors */ | 1846 | /* Include .intrpt array of interrupt vectors */ |
| 1836 | .section ".intrpt", "ax" | 1847 | .section ".intrpt", "ax" |
| 1837 | 1848 | ||
| 1838 | #define op_handle_perf_interrupt bad_intr | 1849 | #ifndef CONFIG_USE_PMC |
| 1839 | #define op_handle_aux_perf_interrupt bad_intr | 1850 | #define handle_perf_interrupt bad_intr |
| 1851 | #endif | ||
| 1840 | 1852 | ||
| 1841 | #ifndef CONFIG_HARDWALL | 1853 | #ifndef CONFIG_HARDWALL |
| 1842 | #define do_hardwall_trap bad_intr | 1854 | #define do_hardwall_trap bad_intr |
| @@ -1877,7 +1889,7 @@ int_unalign: | |||
| 1877 | int_hand INT_IDN_AVAIL, IDN_AVAIL, bad_intr | 1889 | int_hand INT_IDN_AVAIL, IDN_AVAIL, bad_intr |
| 1878 | int_hand INT_UDN_AVAIL, UDN_AVAIL, bad_intr | 1890 | int_hand INT_UDN_AVAIL, UDN_AVAIL, bad_intr |
| 1879 | int_hand INT_PERF_COUNT, PERF_COUNT, \ | 1891 | int_hand INT_PERF_COUNT, PERF_COUNT, \ |
| 1880 | op_handle_perf_interrupt, handle_nmi | 1892 | handle_perf_interrupt, handle_nmi |
| 1881 | int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr | 1893 | int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr |
| 1882 | #if CONFIG_KERNEL_PL == 2 | 1894 | #if CONFIG_KERNEL_PL == 2 |
| 1883 | dc_dispatch INT_INTCTRL_2, INTCTRL_2 | 1895 | dc_dispatch INT_INTCTRL_2, INTCTRL_2 |
| @@ -1902,7 +1914,7 @@ int_unalign: | |||
| 1902 | int_hand INT_SN_CPL, SN_CPL, bad_intr | 1914 | int_hand INT_SN_CPL, SN_CPL, bad_intr |
| 1903 | int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap | 1915 | int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap |
| 1904 | int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ | 1916 | int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ |
| 1905 | op_handle_aux_perf_interrupt, handle_nmi | 1917 | handle_perf_interrupt, handle_nmi |
| 1906 | 1918 | ||
| 1907 | /* Synthetic interrupt delivered only by the simulator */ | 1919 | /* Synthetic interrupt delivered only by the simulator */ |
| 1908 | int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint | 1920 | int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint |
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index b8fc497f2437..5b67efcecabd 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S | |||
| @@ -509,10 +509,10 @@ intvec_\vecname: | |||
| 509 | .ifc \c_routine, do_trap | 509 | .ifc \c_routine, do_trap |
| 510 | mfspr r2, GPV_REASON | 510 | mfspr r2, GPV_REASON |
| 511 | .else | 511 | .else |
| 512 | .ifc \c_routine, op_handle_perf_interrupt | 512 | .ifc \c_routine, handle_perf_interrupt |
| 513 | mfspr r2, PERF_COUNT_STS | 513 | mfspr r2, PERF_COUNT_STS |
| 514 | .else | 514 | .else |
| 515 | .ifc \c_routine, op_handle_aux_perf_interrupt | 515 | .ifc \c_routine, handle_perf_interrupt |
| 516 | mfspr r2, AUX_PERF_COUNT_STS | 516 | mfspr r2, AUX_PERF_COUNT_STS |
| 517 | .endif | 517 | .endif |
| 518 | .endif | 518 | .endif |
| @@ -971,6 +971,15 @@ STD_ENTRY(interrupt_return) | |||
| 971 | beqzt r30, .Lrestore_regs | 971 | beqzt r30, .Lrestore_regs |
| 972 | 3: | 972 | 3: |
| 973 | 973 | ||
| 974 | #if INT_PERF_COUNT + 1 != INT_AUX_PERF_COUNT | ||
| 975 | # error Bad interrupt assumption | ||
| 976 | #endif | ||
| 977 | { | ||
| 978 | movei r0, 3 /* two adjacent bits for the PERF_COUNT mask */ | ||
| 979 | beqz r31, .Lrestore_regs | ||
| 980 | } | ||
| 981 | shli r0, r0, INT_PERF_COUNT | ||
| 982 | mtspr SPR_INTERRUPT_MASK_RESET_K, r0 | ||
| 974 | 983 | ||
| 975 | /* | 984 | /* |
| 976 | * We now commit to returning from this interrupt, since we will be | 985 | * We now commit to returning from this interrupt, since we will be |
| @@ -1187,7 +1196,7 @@ handle_nmi: | |||
| 1187 | FEEDBACK_REENTER(handle_nmi) | 1196 | FEEDBACK_REENTER(handle_nmi) |
| 1188 | { | 1197 | { |
| 1189 | movei r30, 1 | 1198 | movei r30, 1 |
| 1190 | move r31, r0 | 1199 | cmpeq r31, r0, zero |
| 1191 | } | 1200 | } |
| 1192 | j interrupt_return | 1201 | j interrupt_return |
| 1193 | STD_ENDPROC(handle_nmi) | 1202 | STD_ENDPROC(handle_nmi) |
| @@ -1491,8 +1500,9 @@ STD_ENTRY(fill_ra_stack) | |||
| 1491 | .global intrpt_start | 1500 | .global intrpt_start |
| 1492 | intrpt_start: | 1501 | intrpt_start: |
| 1493 | 1502 | ||
| 1494 | #define op_handle_perf_interrupt bad_intr | 1503 | #ifndef CONFIG_USE_PMC |
| 1495 | #define op_handle_aux_perf_interrupt bad_intr | 1504 | #define handle_perf_interrupt bad_intr |
| 1505 | #endif | ||
| 1496 | 1506 | ||
| 1497 | #ifndef CONFIG_HARDWALL | 1507 | #ifndef CONFIG_HARDWALL |
| 1498 | #define do_hardwall_trap bad_intr | 1508 | #define do_hardwall_trap bad_intr |
| @@ -1540,9 +1550,9 @@ intrpt_start: | |||
| 1540 | #endif | 1550 | #endif |
| 1541 | int_hand INT_IPI_0, IPI_0, bad_intr | 1551 | int_hand INT_IPI_0, IPI_0, bad_intr |
| 1542 | int_hand INT_PERF_COUNT, PERF_COUNT, \ | 1552 | int_hand INT_PERF_COUNT, PERF_COUNT, \ |
| 1543 | op_handle_perf_interrupt, handle_nmi | 1553 | handle_perf_interrupt, handle_nmi |
| 1544 | int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ | 1554 | int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ |
| 1545 | op_handle_perf_interrupt, handle_nmi | 1555 | handle_perf_interrupt, handle_nmi |
| 1546 | int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr | 1556 | int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr |
| 1547 | #if CONFIG_KERNEL_PL == 2 | 1557 | #if CONFIG_KERNEL_PL == 2 |
| 1548 | dc_dispatch INT_INTCTRL_2, INTCTRL_2 | 1558 | dc_dispatch INT_INTCTRL_2, INTCTRL_2 |
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 0586fdb9352d..906a76bdb31d 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <hv/drv_pcie_rc_intf.h> | 21 | #include <hv/drv_pcie_rc_intf.h> |
| 22 | #include <arch/spr_def.h> | 22 | #include <arch/spr_def.h> |
| 23 | #include <asm/traps.h> | 23 | #include <asm/traps.h> |
| 24 | #include <linux/perf_event.h> | ||
| 24 | 25 | ||
| 25 | /* Bit-flag stored in irq_desc->chip_data to indicate HW-cleared irqs. */ | 26 | /* Bit-flag stored in irq_desc->chip_data to indicate HW-cleared irqs. */ |
| 26 | #define IS_HW_CLEARED 1 | 27 | #define IS_HW_CLEARED 1 |
| @@ -261,6 +262,23 @@ void ack_bad_irq(unsigned int irq) | |||
| 261 | } | 262 | } |
| 262 | 263 | ||
| 263 | /* | 264 | /* |
| 265 | * /proc/interrupts printing: | ||
| 266 | */ | ||
| 267 | int arch_show_interrupts(struct seq_file *p, int prec) | ||
| 268 | { | ||
| 269 | #ifdef CONFIG_PERF_EVENTS | ||
| 270 | int i; | ||
| 271 | |||
| 272 | seq_printf(p, "%*s: ", prec, "PMI"); | ||
| 273 | |||
| 274 | for_each_online_cpu(i) | ||
| 275 | seq_printf(p, "%10llu ", per_cpu(perf_irqs, i)); | ||
| 276 | seq_puts(p, " perf_events\n"); | ||
| 277 | #endif | ||
| 278 | return 0; | ||
| 279 | } | ||
| 280 | |||
| 281 | /* | ||
| 264 | * Generic, controller-independent functions: | 282 | * Generic, controller-independent functions: |
| 265 | */ | 283 | */ |
| 266 | 284 | ||
diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c index 00331af9525d..7867266f9716 100644 --- a/arch/tile/kernel/messaging.c +++ b/arch/tile/kernel/messaging.c | |||
| @@ -68,8 +68,8 @@ void hv_message_intr(struct pt_regs *regs, int intnum) | |||
| 68 | #endif | 68 | #endif |
| 69 | 69 | ||
| 70 | while (1) { | 70 | while (1) { |
| 71 | rmi = hv_receive_message(__get_cpu_var(msg_state), | 71 | HV_MsgState *state = this_cpu_ptr(&msg_state); |
| 72 | (HV_VirtAddr) message, | 72 | rmi = hv_receive_message(*state, (HV_VirtAddr) message, |
| 73 | sizeof(message)); | 73 | sizeof(message)); |
| 74 | if (rmi.msglen == 0) | 74 | if (rmi.msglen == 0) |
| 75 | break; | 75 | break; |
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c index c45593db7718..1f80a88c75a6 100644 --- a/arch/tile/kernel/pci.c +++ b/arch/tile/kernel/pci.c | |||
| @@ -250,8 +250,6 @@ static void fixup_read_and_payload_sizes(void) | |||
| 250 | 250 | ||
| 251 | /* Scan for the smallest maximum payload size. */ | 251 | /* Scan for the smallest maximum payload size. */ |
| 252 | for_each_pci_dev(dev) { | 252 | for_each_pci_dev(dev) { |
| 253 | u32 devcap; | ||
| 254 | |||
| 255 | if (!pci_is_pcie(dev)) | 253 | if (!pci_is_pcie(dev)) |
| 256 | continue; | 254 | continue; |
| 257 | 255 | ||
diff --git a/arch/tile/kernel/perf_event.c b/arch/tile/kernel/perf_event.c new file mode 100644 index 000000000000..2bf6c9c135c1 --- /dev/null +++ b/arch/tile/kernel/perf_event.c | |||
| @@ -0,0 +1,1005 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2014 Tilera Corporation. All Rights Reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public License | ||
| 6 | * as published by the Free Software Foundation, version 2. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, but | ||
| 9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | * | ||
| 14 | * | ||
| 15 | * Perf_events support for Tile processor. | ||
| 16 | * | ||
| 17 | * This code is based upon the x86 perf event | ||
| 18 | * code, which is: | ||
| 19 | * | ||
| 20 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
| 21 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | ||
| 22 | * Copyright (C) 2009 Jaswinder Singh Rajput | ||
| 23 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | ||
| 24 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
| 25 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | ||
| 26 | * Copyright (C) 2009 Google, Inc., Stephane Eranian | ||
| 27 | */ | ||
| 28 | |||
| 29 | #include <linux/kprobes.h> | ||
| 30 | #include <linux/kernel.h> | ||
| 31 | #include <linux/kdebug.h> | ||
| 32 | #include <linux/mutex.h> | ||
| 33 | #include <linux/bitmap.h> | ||
| 34 | #include <linux/irq.h> | ||
| 35 | #include <linux/interrupt.h> | ||
| 36 | #include <linux/perf_event.h> | ||
| 37 | #include <linux/atomic.h> | ||
| 38 | #include <asm/traps.h> | ||
| 39 | #include <asm/stack.h> | ||
| 40 | #include <asm/pmc.h> | ||
| 41 | #include <hv/hypervisor.h> | ||
| 42 | |||
| 43 | #define TILE_MAX_COUNTERS 4 | ||
| 44 | |||
| 45 | #define PERF_COUNT_0_IDX 0 | ||
| 46 | #define PERF_COUNT_1_IDX 1 | ||
| 47 | #define AUX_PERF_COUNT_0_IDX 2 | ||
| 48 | #define AUX_PERF_COUNT_1_IDX 3 | ||
| 49 | |||
| 50 | struct cpu_hw_events { | ||
| 51 | int n_events; | ||
| 52 | struct perf_event *events[TILE_MAX_COUNTERS]; /* counter order */ | ||
| 53 | struct perf_event *event_list[TILE_MAX_COUNTERS]; /* enabled | ||
| 54 | order */ | ||
| 55 | int assign[TILE_MAX_COUNTERS]; | ||
| 56 | unsigned long active_mask[BITS_TO_LONGS(TILE_MAX_COUNTERS)]; | ||
| 57 | unsigned long used_mask; | ||
| 58 | }; | ||
| 59 | |||
| 60 | /* TILE arch specific performance monitor unit */ | ||
| 61 | struct tile_pmu { | ||
| 62 | const char *name; | ||
| 63 | int version; | ||
| 64 | const int *hw_events; /* generic hw events table */ | ||
| 65 | /* generic hw cache events table */ | ||
| 66 | const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] | ||
| 67 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
| 68 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
| 69 | int (*map_hw_event)(u64); /*method used to map | ||
| 70 | hw events */ | ||
| 71 | int (*map_cache_event)(u64); /*method used to map | ||
| 72 | cache events */ | ||
| 73 | |||
| 74 | u64 max_period; /* max sampling period */ | ||
| 75 | u64 cntval_mask; /* counter width mask */ | ||
| 76 | int cntval_bits; /* counter width */ | ||
| 77 | int max_events; /* max generic hw events | ||
| 78 | in map */ | ||
| 79 | int num_counters; /* number base + aux counters */ | ||
| 80 | int num_base_counters; /* number base counters */ | ||
| 81 | }; | ||
| 82 | |||
| 83 | DEFINE_PER_CPU(u64, perf_irqs); | ||
| 84 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | ||
| 85 | |||
| 86 | #define TILE_OP_UNSUPP (-1) | ||
| 87 | |||
| 88 | #ifndef __tilegx__ | ||
| 89 | /* TILEPro hardware events map */ | ||
| 90 | static const int tile_hw_event_map[] = { | ||
| 91 | [PERF_COUNT_HW_CPU_CYCLES] = 0x01, /* ONE */ | ||
| 92 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x06, /* MP_BUNDLE_RETIRED */ | ||
| 93 | [PERF_COUNT_HW_CACHE_REFERENCES] = TILE_OP_UNSUPP, | ||
| 94 | [PERF_COUNT_HW_CACHE_MISSES] = TILE_OP_UNSUPP, | ||
| 95 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x16, /* | ||
| 96 | MP_CONDITIONAL_BRANCH_ISSUED */ | ||
| 97 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x14, /* | ||
| 98 | MP_CONDITIONAL_BRANCH_MISSPREDICT */ | ||
| 99 | [PERF_COUNT_HW_BUS_CYCLES] = TILE_OP_UNSUPP, | ||
| 100 | }; | ||
| 101 | #else | ||
| 102 | /* TILEGx hardware events map */ | ||
| 103 | static const int tile_hw_event_map[] = { | ||
| 104 | [PERF_COUNT_HW_CPU_CYCLES] = 0x181, /* ONE */ | ||
| 105 | [PERF_COUNT_HW_INSTRUCTIONS] = 0xdb, /* INSTRUCTION_BUNDLE */ | ||
| 106 | [PERF_COUNT_HW_CACHE_REFERENCES] = TILE_OP_UNSUPP, | ||
| 107 | [PERF_COUNT_HW_CACHE_MISSES] = TILE_OP_UNSUPP, | ||
| 108 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0xd9, /* | ||
| 109 | COND_BRANCH_PRED_CORRECT */ | ||
| 110 | [PERF_COUNT_HW_BRANCH_MISSES] = 0xda, /* | ||
| 111 | COND_BRANCH_PRED_INCORRECT */ | ||
| 112 | [PERF_COUNT_HW_BUS_CYCLES] = TILE_OP_UNSUPP, | ||
| 113 | }; | ||
| 114 | #endif | ||
| 115 | |||
| 116 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
| 117 | |||
| 118 | /* | ||
| 119 | * Generalized hw caching related hw_event table, filled | ||
| 120 | * in on a per model basis. A value of -1 means | ||
| 121 | * 'not supported', any other value means the | ||
| 122 | * raw hw_event ID. | ||
| 123 | */ | ||
| 124 | #ifndef __tilegx__ | ||
| 125 | /* TILEPro hardware cache event map */ | ||
| 126 | static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX] | ||
| 127 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
| 128 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
| 129 | [C(L1D)] = { | ||
| 130 | [C(OP_READ)] = { | ||
| 131 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 132 | [C(RESULT_MISS)] = 0x21, /* RD_MISS */ | ||
| 133 | }, | ||
| 134 | [C(OP_WRITE)] = { | ||
| 135 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 136 | [C(RESULT_MISS)] = 0x22, /* WR_MISS */ | ||
| 137 | }, | ||
| 138 | [C(OP_PREFETCH)] = { | ||
| 139 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 140 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 141 | }, | ||
| 142 | }, | ||
| 143 | [C(L1I)] = { | ||
| 144 | [C(OP_READ)] = { | ||
| 145 | [C(RESULT_ACCESS)] = 0x12, /* MP_ICACHE_HIT_ISSUED */ | ||
| 146 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 147 | }, | ||
| 148 | [C(OP_WRITE)] = { | ||
| 149 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 150 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 151 | }, | ||
| 152 | [C(OP_PREFETCH)] = { | ||
| 153 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 154 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 155 | }, | ||
| 156 | }, | ||
| 157 | [C(LL)] = { | ||
| 158 | [C(OP_READ)] = { | ||
| 159 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 160 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 161 | }, | ||
| 162 | [C(OP_WRITE)] = { | ||
| 163 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 164 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 165 | }, | ||
| 166 | [C(OP_PREFETCH)] = { | ||
| 167 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 168 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 169 | }, | ||
| 170 | }, | ||
| 171 | [C(DTLB)] = { | ||
| 172 | [C(OP_READ)] = { | ||
| 173 | [C(RESULT_ACCESS)] = 0x1d, /* TLB_CNT */ | ||
| 174 | [C(RESULT_MISS)] = 0x20, /* TLB_EXCEPTION */ | ||
| 175 | }, | ||
| 176 | [C(OP_WRITE)] = { | ||
| 177 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 178 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 179 | }, | ||
| 180 | [C(OP_PREFETCH)] = { | ||
| 181 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 182 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 183 | }, | ||
| 184 | }, | ||
| 185 | [C(ITLB)] = { | ||
| 186 | [C(OP_READ)] = { | ||
| 187 | [C(RESULT_ACCESS)] = 0x13, /* MP_ITLB_HIT_ISSUED */ | ||
| 188 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 189 | }, | ||
| 190 | [C(OP_WRITE)] = { | ||
| 191 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 192 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 193 | }, | ||
| 194 | [C(OP_PREFETCH)] = { | ||
| 195 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 196 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 197 | }, | ||
| 198 | }, | ||
| 199 | [C(BPU)] = { | ||
| 200 | [C(OP_READ)] = { | ||
| 201 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 202 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 203 | }, | ||
| 204 | [C(OP_WRITE)] = { | ||
| 205 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 206 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 207 | }, | ||
| 208 | [C(OP_PREFETCH)] = { | ||
| 209 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 210 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 211 | }, | ||
| 212 | }, | ||
| 213 | }; | ||
| 214 | #else | ||
| 215 | /* TILEGx hardware events map */ | ||
| 216 | static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX] | ||
| 217 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
| 218 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
| 219 | [C(L1D)] = { | ||
| 220 | /* | ||
| 221 | * Like some other architectures (e.g. ARM), the performance | ||
| 222 | * counters don't differentiate between read and write | ||
| 223 | * accesses/misses, so this isn't strictly correct, but it's the | ||
| 224 | * best we can do. Writes and reads get combined. | ||
| 225 | */ | ||
| 226 | [C(OP_READ)] = { | ||
| 227 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 228 | [C(RESULT_MISS)] = 0x44, /* RD_MISS */ | ||
| 229 | }, | ||
| 230 | [C(OP_WRITE)] = { | ||
| 231 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 232 | [C(RESULT_MISS)] = 0x45, /* WR_MISS */ | ||
| 233 | }, | ||
| 234 | [C(OP_PREFETCH)] = { | ||
| 235 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 236 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 237 | }, | ||
| 238 | }, | ||
| 239 | [C(L1I)] = { | ||
| 240 | [C(OP_READ)] = { | ||
| 241 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 242 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 243 | }, | ||
| 244 | [C(OP_WRITE)] = { | ||
| 245 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 246 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 247 | }, | ||
| 248 | [C(OP_PREFETCH)] = { | ||
| 249 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 250 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 251 | }, | ||
| 252 | }, | ||
| 253 | [C(LL)] = { | ||
| 254 | [C(OP_READ)] = { | ||
| 255 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 256 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 257 | }, | ||
| 258 | [C(OP_WRITE)] = { | ||
| 259 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 260 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 261 | }, | ||
| 262 | [C(OP_PREFETCH)] = { | ||
| 263 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 264 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 265 | }, | ||
| 266 | }, | ||
| 267 | [C(DTLB)] = { | ||
| 268 | [C(OP_READ)] = { | ||
| 269 | [C(RESULT_ACCESS)] = 0x40, /* TLB_CNT */ | ||
| 270 | [C(RESULT_MISS)] = 0x43, /* TLB_EXCEPTION */ | ||
| 271 | }, | ||
| 272 | [C(OP_WRITE)] = { | ||
| 273 | [C(RESULT_ACCESS)] = 0x40, /* TLB_CNT */ | ||
| 274 | [C(RESULT_MISS)] = 0x43, /* TLB_EXCEPTION */ | ||
| 275 | }, | ||
| 276 | [C(OP_PREFETCH)] = { | ||
| 277 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 278 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 279 | }, | ||
| 280 | }, | ||
| 281 | [C(ITLB)] = { | ||
| 282 | [C(OP_READ)] = { | ||
| 283 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 284 | [C(RESULT_MISS)] = 0xd4, /* ITLB_MISS_INT */ | ||
| 285 | }, | ||
| 286 | [C(OP_WRITE)] = { | ||
| 287 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 288 | [C(RESULT_MISS)] = 0xd4, /* ITLB_MISS_INT */ | ||
| 289 | }, | ||
| 290 | [C(OP_PREFETCH)] = { | ||
| 291 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 292 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 293 | }, | ||
| 294 | }, | ||
| 295 | [C(BPU)] = { | ||
| 296 | [C(OP_READ)] = { | ||
| 297 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 298 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 299 | }, | ||
| 300 | [C(OP_WRITE)] = { | ||
| 301 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 302 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 303 | }, | ||
| 304 | [C(OP_PREFETCH)] = { | ||
| 305 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 306 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 307 | }, | ||
| 308 | }, | ||
| 309 | }; | ||
| 310 | #endif | ||
| 311 | |||
| 312 | static atomic_t tile_active_events; | ||
| 313 | static DEFINE_MUTEX(perf_intr_reserve_mutex); | ||
| 314 | |||
| 315 | static int tile_map_hw_event(u64 config); | ||
| 316 | static int tile_map_cache_event(u64 config); | ||
| 317 | |||
| 318 | static int tile_pmu_handle_irq(struct pt_regs *regs, int fault); | ||
| 319 | |||
| 320 | /* | ||
| 321 | * To avoid new_raw_count getting larger then pre_raw_count | ||
| 322 | * in tile_perf_event_update(), we limit the value of max_period to 2^31 - 1. | ||
| 323 | */ | ||
| 324 | static const struct tile_pmu tilepmu = { | ||
| 325 | #ifndef __tilegx__ | ||
| 326 | .name = "tilepro", | ||
| 327 | #else | ||
| 328 | .name = "tilegx", | ||
| 329 | #endif | ||
| 330 | .max_events = ARRAY_SIZE(tile_hw_event_map), | ||
| 331 | .map_hw_event = tile_map_hw_event, | ||
| 332 | .hw_events = tile_hw_event_map, | ||
| 333 | .map_cache_event = tile_map_cache_event, | ||
| 334 | .cache_events = &tile_cache_event_map, | ||
| 335 | .cntval_bits = 32, | ||
| 336 | .cntval_mask = (1ULL << 32) - 1, | ||
| 337 | .max_period = (1ULL << 31) - 1, | ||
| 338 | .num_counters = TILE_MAX_COUNTERS, | ||
| 339 | .num_base_counters = TILE_BASE_COUNTERS, | ||
| 340 | }; | ||
| 341 | |||
| 342 | static const struct tile_pmu *tile_pmu __read_mostly; | ||
| 343 | |||
| 344 | /* | ||
| 345 | * Check whether perf event is enabled. | ||
| 346 | */ | ||
| 347 | int tile_perf_enabled(void) | ||
| 348 | { | ||
| 349 | return atomic_read(&tile_active_events) != 0; | ||
| 350 | } | ||
| 351 | |||
| 352 | /* | ||
| 353 | * Read Performance Counters. | ||
| 354 | */ | ||
| 355 | static inline u64 read_counter(int idx) | ||
| 356 | { | ||
| 357 | u64 val = 0; | ||
| 358 | |||
| 359 | /* __insn_mfspr() only takes an immediate argument */ | ||
| 360 | switch (idx) { | ||
| 361 | case PERF_COUNT_0_IDX: | ||
| 362 | val = __insn_mfspr(SPR_PERF_COUNT_0); | ||
| 363 | break; | ||
| 364 | case PERF_COUNT_1_IDX: | ||
| 365 | val = __insn_mfspr(SPR_PERF_COUNT_1); | ||
| 366 | break; | ||
| 367 | case AUX_PERF_COUNT_0_IDX: | ||
| 368 | val = __insn_mfspr(SPR_AUX_PERF_COUNT_0); | ||
| 369 | break; | ||
| 370 | case AUX_PERF_COUNT_1_IDX: | ||
| 371 | val = __insn_mfspr(SPR_AUX_PERF_COUNT_1); | ||
| 372 | break; | ||
| 373 | default: | ||
| 374 | WARN_ON_ONCE(idx > AUX_PERF_COUNT_1_IDX || | ||
| 375 | idx < PERF_COUNT_0_IDX); | ||
| 376 | } | ||
| 377 | |||
| 378 | return val; | ||
| 379 | } | ||
| 380 | |||
| 381 | /* | ||
| 382 | * Write Performance Counters. | ||
| 383 | */ | ||
| 384 | static inline void write_counter(int idx, u64 value) | ||
| 385 | { | ||
| 386 | /* __insn_mtspr() only takes an immediate argument */ | ||
| 387 | switch (idx) { | ||
| 388 | case PERF_COUNT_0_IDX: | ||
| 389 | __insn_mtspr(SPR_PERF_COUNT_0, value); | ||
| 390 | break; | ||
| 391 | case PERF_COUNT_1_IDX: | ||
| 392 | __insn_mtspr(SPR_PERF_COUNT_1, value); | ||
| 393 | break; | ||
| 394 | case AUX_PERF_COUNT_0_IDX: | ||
| 395 | __insn_mtspr(SPR_AUX_PERF_COUNT_0, value); | ||
| 396 | break; | ||
| 397 | case AUX_PERF_COUNT_1_IDX: | ||
| 398 | __insn_mtspr(SPR_AUX_PERF_COUNT_1, value); | ||
| 399 | break; | ||
| 400 | default: | ||
| 401 | WARN_ON_ONCE(idx > AUX_PERF_COUNT_1_IDX || | ||
| 402 | idx < PERF_COUNT_0_IDX); | ||
| 403 | } | ||
| 404 | } | ||
| 405 | |||
| 406 | /* | ||
| 407 | * Enable performance event by setting | ||
| 408 | * Performance Counter Control registers. | ||
| 409 | */ | ||
| 410 | static inline void tile_pmu_enable_event(struct perf_event *event) | ||
| 411 | { | ||
| 412 | struct hw_perf_event *hwc = &event->hw; | ||
| 413 | unsigned long cfg, mask; | ||
| 414 | int shift, idx = hwc->idx; | ||
| 415 | |||
| 416 | /* | ||
| 417 | * prevent early activation from tile_pmu_start() in hw_perf_enable | ||
| 418 | */ | ||
| 419 | |||
| 420 | if (WARN_ON_ONCE(idx == -1)) | ||
| 421 | return; | ||
| 422 | |||
| 423 | if (idx < tile_pmu->num_base_counters) | ||
| 424 | cfg = __insn_mfspr(SPR_PERF_COUNT_CTL); | ||
| 425 | else | ||
| 426 | cfg = __insn_mfspr(SPR_AUX_PERF_COUNT_CTL); | ||
| 427 | |||
| 428 | switch (idx) { | ||
| 429 | case PERF_COUNT_0_IDX: | ||
| 430 | case AUX_PERF_COUNT_0_IDX: | ||
| 431 | mask = TILE_EVENT_MASK; | ||
| 432 | shift = 0; | ||
| 433 | break; | ||
| 434 | case PERF_COUNT_1_IDX: | ||
| 435 | case AUX_PERF_COUNT_1_IDX: | ||
| 436 | mask = TILE_EVENT_MASK << 16; | ||
| 437 | shift = 16; | ||
| 438 | break; | ||
| 439 | default: | ||
| 440 | WARN_ON_ONCE(idx < PERF_COUNT_0_IDX || | ||
| 441 | idx > AUX_PERF_COUNT_1_IDX); | ||
| 442 | return; | ||
| 443 | } | ||
| 444 | |||
| 445 | /* Clear mask bits to enable the event. */ | ||
| 446 | cfg &= ~mask; | ||
| 447 | cfg |= hwc->config << shift; | ||
| 448 | |||
| 449 | if (idx < tile_pmu->num_base_counters) | ||
| 450 | __insn_mtspr(SPR_PERF_COUNT_CTL, cfg); | ||
| 451 | else | ||
| 452 | __insn_mtspr(SPR_AUX_PERF_COUNT_CTL, cfg); | ||
| 453 | } | ||
| 454 | |||
| 455 | /* | ||
| 456 | * Disable performance event by clearing | ||
| 457 | * Performance Counter Control registers. | ||
| 458 | */ | ||
| 459 | static inline void tile_pmu_disable_event(struct perf_event *event) | ||
| 460 | { | ||
| 461 | struct hw_perf_event *hwc = &event->hw; | ||
| 462 | unsigned long cfg, mask; | ||
| 463 | int idx = hwc->idx; | ||
| 464 | |||
| 465 | if (idx == -1) | ||
| 466 | return; | ||
| 467 | |||
| 468 | if (idx < tile_pmu->num_base_counters) | ||
| 469 | cfg = __insn_mfspr(SPR_PERF_COUNT_CTL); | ||
| 470 | else | ||
| 471 | cfg = __insn_mfspr(SPR_AUX_PERF_COUNT_CTL); | ||
| 472 | |||
| 473 | switch (idx) { | ||
| 474 | case PERF_COUNT_0_IDX: | ||
| 475 | case AUX_PERF_COUNT_0_IDX: | ||
| 476 | mask = TILE_PLM_MASK; | ||
| 477 | break; | ||
| 478 | case PERF_COUNT_1_IDX: | ||
| 479 | case AUX_PERF_COUNT_1_IDX: | ||
| 480 | mask = TILE_PLM_MASK << 16; | ||
| 481 | break; | ||
| 482 | default: | ||
| 483 | WARN_ON_ONCE(idx < PERF_COUNT_0_IDX || | ||
| 484 | idx > AUX_PERF_COUNT_1_IDX); | ||
| 485 | return; | ||
| 486 | } | ||
| 487 | |||
| 488 | /* Set mask bits to disable the event. */ | ||
| 489 | cfg |= mask; | ||
| 490 | |||
| 491 | if (idx < tile_pmu->num_base_counters) | ||
| 492 | __insn_mtspr(SPR_PERF_COUNT_CTL, cfg); | ||
| 493 | else | ||
| 494 | __insn_mtspr(SPR_AUX_PERF_COUNT_CTL, cfg); | ||
| 495 | } | ||
| 496 | |||
| 497 | /* | ||
| 498 | * Propagate event elapsed time into the generic event. | ||
| 499 | * Can only be executed on the CPU where the event is active. | ||
| 500 | * Returns the delta events processed. | ||
| 501 | */ | ||
| 502 | static u64 tile_perf_event_update(struct perf_event *event) | ||
| 503 | { | ||
| 504 | struct hw_perf_event *hwc = &event->hw; | ||
| 505 | int shift = 64 - tile_pmu->cntval_bits; | ||
| 506 | u64 prev_raw_count, new_raw_count; | ||
| 507 | u64 oldval; | ||
| 508 | int idx = hwc->idx; | ||
| 509 | u64 delta; | ||
| 510 | |||
| 511 | /* | ||
| 512 | * Careful: an NMI might modify the previous event value. | ||
| 513 | * | ||
| 514 | * Our tactic to handle this is to first atomically read and | ||
| 515 | * exchange a new raw count - then add that new-prev delta | ||
| 516 | * count to the generic event atomically: | ||
| 517 | */ | ||
| 518 | again: | ||
| 519 | prev_raw_count = local64_read(&hwc->prev_count); | ||
| 520 | new_raw_count = read_counter(idx); | ||
| 521 | |||
| 522 | oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
| 523 | new_raw_count); | ||
| 524 | if (oldval != prev_raw_count) | ||
| 525 | goto again; | ||
| 526 | |||
| 527 | /* | ||
| 528 | * Now we have the new raw value and have updated the prev | ||
| 529 | * timestamp already. We can now calculate the elapsed delta | ||
| 530 | * (event-)time and add that to the generic event. | ||
| 531 | * | ||
| 532 | * Careful, not all hw sign-extends above the physical width | ||
| 533 | * of the count. | ||
| 534 | */ | ||
| 535 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | ||
| 536 | delta >>= shift; | ||
| 537 | |||
| 538 | local64_add(delta, &event->count); | ||
| 539 | local64_sub(delta, &hwc->period_left); | ||
| 540 | |||
| 541 | return new_raw_count; | ||
| 542 | } | ||
| 543 | |||
| 544 | /* | ||
| 545 | * Set the next IRQ period, based on the hwc->period_left value. | ||
| 546 | * To be called with the event disabled in hw: | ||
| 547 | */ | ||
| 548 | static int tile_event_set_period(struct perf_event *event) | ||
| 549 | { | ||
| 550 | struct hw_perf_event *hwc = &event->hw; | ||
| 551 | int idx = hwc->idx; | ||
| 552 | s64 left = local64_read(&hwc->period_left); | ||
| 553 | s64 period = hwc->sample_period; | ||
| 554 | int ret = 0; | ||
| 555 | |||
| 556 | /* | ||
| 557 | * If we are way outside a reasonable range then just skip forward: | ||
| 558 | */ | ||
| 559 | if (unlikely(left <= -period)) { | ||
| 560 | left = period; | ||
| 561 | local64_set(&hwc->period_left, left); | ||
| 562 | hwc->last_period = period; | ||
| 563 | ret = 1; | ||
| 564 | } | ||
| 565 | |||
| 566 | if (unlikely(left <= 0)) { | ||
| 567 | left += period; | ||
| 568 | local64_set(&hwc->period_left, left); | ||
| 569 | hwc->last_period = period; | ||
| 570 | ret = 1; | ||
| 571 | } | ||
| 572 | if (left > tile_pmu->max_period) | ||
| 573 | left = tile_pmu->max_period; | ||
| 574 | |||
| 575 | /* | ||
| 576 | * The hw event starts counting from this event offset, | ||
| 577 | * mark it to be able to extra future deltas: | ||
| 578 | */ | ||
| 579 | local64_set(&hwc->prev_count, (u64)-left); | ||
| 580 | |||
| 581 | write_counter(idx, (u64)(-left) & tile_pmu->cntval_mask); | ||
| 582 | |||
| 583 | perf_event_update_userpage(event); | ||
| 584 | |||
| 585 | return ret; | ||
| 586 | } | ||
| 587 | |||
| 588 | /* | ||
| 589 | * Stop the event but do not release the PMU counter | ||
| 590 | */ | ||
| 591 | static void tile_pmu_stop(struct perf_event *event, int flags) | ||
| 592 | { | ||
| 593 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 594 | struct hw_perf_event *hwc = &event->hw; | ||
| 595 | int idx = hwc->idx; | ||
| 596 | |||
| 597 | if (__test_and_clear_bit(idx, cpuc->active_mask)) { | ||
| 598 | tile_pmu_disable_event(event); | ||
| 599 | cpuc->events[hwc->idx] = NULL; | ||
| 600 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | ||
| 601 | hwc->state |= PERF_HES_STOPPED; | ||
| 602 | } | ||
| 603 | |||
| 604 | if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { | ||
| 605 | /* | ||
| 606 | * Drain the remaining delta count out of a event | ||
| 607 | * that we are disabling: | ||
| 608 | */ | ||
| 609 | tile_perf_event_update(event); | ||
| 610 | hwc->state |= PERF_HES_UPTODATE; | ||
| 611 | } | ||
| 612 | } | ||
| 613 | |||
| 614 | /* | ||
| 615 | * Start an event (without re-assigning counter) | ||
| 616 | */ | ||
| 617 | static void tile_pmu_start(struct perf_event *event, int flags) | ||
| 618 | { | ||
| 619 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 620 | int idx = event->hw.idx; | ||
| 621 | |||
| 622 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | ||
| 623 | return; | ||
| 624 | |||
| 625 | if (WARN_ON_ONCE(idx == -1)) | ||
| 626 | return; | ||
| 627 | |||
| 628 | if (flags & PERF_EF_RELOAD) { | ||
| 629 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
| 630 | tile_event_set_period(event); | ||
| 631 | } | ||
| 632 | |||
| 633 | event->hw.state = 0; | ||
| 634 | |||
| 635 | cpuc->events[idx] = event; | ||
| 636 | __set_bit(idx, cpuc->active_mask); | ||
| 637 | |||
| 638 | unmask_pmc_interrupts(); | ||
| 639 | |||
| 640 | tile_pmu_enable_event(event); | ||
| 641 | |||
| 642 | perf_event_update_userpage(event); | ||
| 643 | } | ||
| 644 | |||
| 645 | /* | ||
| 646 | * Add a single event to the PMU. | ||
| 647 | * | ||
| 648 | * The event is added to the group of enabled events | ||
| 649 | * but only if it can be scehduled with existing events. | ||
| 650 | */ | ||
| 651 | static int tile_pmu_add(struct perf_event *event, int flags) | ||
| 652 | { | ||
| 653 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 654 | struct hw_perf_event *hwc; | ||
| 655 | unsigned long mask; | ||
| 656 | int b, max_cnt; | ||
| 657 | |||
| 658 | hwc = &event->hw; | ||
| 659 | |||
| 660 | /* | ||
| 661 | * We are full. | ||
| 662 | */ | ||
| 663 | if (cpuc->n_events == tile_pmu->num_counters) | ||
| 664 | return -ENOSPC; | ||
| 665 | |||
| 666 | cpuc->event_list[cpuc->n_events] = event; | ||
| 667 | cpuc->n_events++; | ||
| 668 | |||
| 669 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
| 670 | if (!(flags & PERF_EF_START)) | ||
| 671 | hwc->state |= PERF_HES_ARCH; | ||
| 672 | |||
| 673 | /* | ||
| 674 | * Find first empty counter. | ||
| 675 | */ | ||
| 676 | max_cnt = tile_pmu->num_counters; | ||
| 677 | mask = ~cpuc->used_mask; | ||
| 678 | |||
| 679 | /* Find next free counter. */ | ||
| 680 | b = find_next_bit(&mask, max_cnt, 0); | ||
| 681 | |||
| 682 | /* Should not happen. */ | ||
| 683 | if (WARN_ON_ONCE(b == max_cnt)) | ||
| 684 | return -ENOSPC; | ||
| 685 | |||
| 686 | /* | ||
| 687 | * Assign counter to event. | ||
| 688 | */ | ||
| 689 | event->hw.idx = b; | ||
| 690 | __set_bit(b, &cpuc->used_mask); | ||
| 691 | |||
| 692 | /* | ||
| 693 | * Start if requested. | ||
| 694 | */ | ||
| 695 | if (flags & PERF_EF_START) | ||
| 696 | tile_pmu_start(event, PERF_EF_RELOAD); | ||
| 697 | |||
| 698 | return 0; | ||
| 699 | } | ||
| 700 | |||
| 701 | /* | ||
| 702 | * Delete a single event from the PMU. | ||
| 703 | * | ||
| 704 | * The event is deleted from the group of enabled events. | ||
| 705 | * If it is the last event, disable PMU interrupt. | ||
| 706 | */ | ||
| 707 | static void tile_pmu_del(struct perf_event *event, int flags) | ||
| 708 | { | ||
| 709 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 710 | int i; | ||
| 711 | |||
| 712 | /* | ||
| 713 | * Remove event from list, compact list if necessary. | ||
| 714 | */ | ||
| 715 | for (i = 0; i < cpuc->n_events; i++) { | ||
| 716 | if (cpuc->event_list[i] == event) { | ||
| 717 | while (++i < cpuc->n_events) | ||
| 718 | cpuc->event_list[i-1] = cpuc->event_list[i]; | ||
| 719 | --cpuc->n_events; | ||
| 720 | cpuc->events[event->hw.idx] = NULL; | ||
| 721 | __clear_bit(event->hw.idx, &cpuc->used_mask); | ||
| 722 | tile_pmu_stop(event, PERF_EF_UPDATE); | ||
| 723 | break; | ||
| 724 | } | ||
| 725 | } | ||
| 726 | /* | ||
| 727 | * If there are no events left, then mask PMU interrupt. | ||
| 728 | */ | ||
| 729 | if (cpuc->n_events == 0) | ||
| 730 | mask_pmc_interrupts(); | ||
| 731 | perf_event_update_userpage(event); | ||
| 732 | } | ||
| 733 | |||
| 734 | /* | ||
| 735 | * Propagate event elapsed time into the event. | ||
| 736 | */ | ||
| 737 | static inline void tile_pmu_read(struct perf_event *event) | ||
| 738 | { | ||
| 739 | tile_perf_event_update(event); | ||
| 740 | } | ||
| 741 | |||
| 742 | /* | ||
| 743 | * Map generic events to Tile PMU. | ||
| 744 | */ | ||
| 745 | static int tile_map_hw_event(u64 config) | ||
| 746 | { | ||
| 747 | if (config >= tile_pmu->max_events) | ||
| 748 | return -EINVAL; | ||
| 749 | return tile_pmu->hw_events[config]; | ||
| 750 | } | ||
| 751 | |||
| 752 | /* | ||
| 753 | * Map generic hardware cache events to Tile PMU. | ||
| 754 | */ | ||
| 755 | static int tile_map_cache_event(u64 config) | ||
| 756 | { | ||
| 757 | unsigned int cache_type, cache_op, cache_result; | ||
| 758 | int code; | ||
| 759 | |||
| 760 | if (!tile_pmu->cache_events) | ||
| 761 | return -ENOENT; | ||
| 762 | |||
| 763 | cache_type = (config >> 0) & 0xff; | ||
| 764 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) | ||
| 765 | return -EINVAL; | ||
| 766 | |||
| 767 | cache_op = (config >> 8) & 0xff; | ||
| 768 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) | ||
| 769 | return -EINVAL; | ||
| 770 | |||
| 771 | cache_result = (config >> 16) & 0xff; | ||
| 772 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
| 773 | return -EINVAL; | ||
| 774 | |||
| 775 | code = (*tile_pmu->cache_events)[cache_type][cache_op][cache_result]; | ||
| 776 | if (code == TILE_OP_UNSUPP) | ||
| 777 | return -EINVAL; | ||
| 778 | |||
| 779 | return code; | ||
| 780 | } | ||
| 781 | |||
| 782 | static void tile_event_destroy(struct perf_event *event) | ||
| 783 | { | ||
| 784 | if (atomic_dec_return(&tile_active_events) == 0) | ||
| 785 | release_pmc_hardware(); | ||
| 786 | } | ||
| 787 | |||
| 788 | static int __tile_event_init(struct perf_event *event) | ||
| 789 | { | ||
| 790 | struct perf_event_attr *attr = &event->attr; | ||
| 791 | struct hw_perf_event *hwc = &event->hw; | ||
| 792 | int code; | ||
| 793 | |||
| 794 | switch (attr->type) { | ||
| 795 | case PERF_TYPE_HARDWARE: | ||
| 796 | code = tile_pmu->map_hw_event(attr->config); | ||
| 797 | break; | ||
| 798 | case PERF_TYPE_HW_CACHE: | ||
| 799 | code = tile_pmu->map_cache_event(attr->config); | ||
| 800 | break; | ||
| 801 | case PERF_TYPE_RAW: | ||
| 802 | code = attr->config & TILE_EVENT_MASK; | ||
| 803 | break; | ||
| 804 | default: | ||
| 805 | /* Should not happen. */ | ||
| 806 | return -EOPNOTSUPP; | ||
| 807 | } | ||
| 808 | |||
| 809 | if (code < 0) | ||
| 810 | return code; | ||
| 811 | |||
| 812 | hwc->config = code; | ||
| 813 | hwc->idx = -1; | ||
| 814 | |||
| 815 | if (attr->exclude_user) | ||
| 816 | hwc->config |= TILE_CTL_EXCL_USER; | ||
| 817 | |||
| 818 | if (attr->exclude_kernel) | ||
| 819 | hwc->config |= TILE_CTL_EXCL_KERNEL; | ||
| 820 | |||
| 821 | if (attr->exclude_hv) | ||
| 822 | hwc->config |= TILE_CTL_EXCL_HV; | ||
| 823 | |||
| 824 | if (!hwc->sample_period) { | ||
| 825 | hwc->sample_period = tile_pmu->max_period; | ||
| 826 | hwc->last_period = hwc->sample_period; | ||
| 827 | local64_set(&hwc->period_left, hwc->sample_period); | ||
| 828 | } | ||
| 829 | event->destroy = tile_event_destroy; | ||
| 830 | return 0; | ||
| 831 | } | ||
| 832 | |||
| 833 | static int tile_event_init(struct perf_event *event) | ||
| 834 | { | ||
| 835 | int err = 0; | ||
| 836 | perf_irq_t old_irq_handler = NULL; | ||
| 837 | |||
| 838 | if (atomic_inc_return(&tile_active_events) == 1) | ||
| 839 | old_irq_handler = reserve_pmc_hardware(tile_pmu_handle_irq); | ||
| 840 | |||
| 841 | if (old_irq_handler) { | ||
| 842 | pr_warn("PMC hardware busy (reserved by oprofile)\n"); | ||
| 843 | |||
| 844 | atomic_dec(&tile_active_events); | ||
| 845 | return -EBUSY; | ||
| 846 | } | ||
| 847 | |||
| 848 | switch (event->attr.type) { | ||
| 849 | case PERF_TYPE_RAW: | ||
| 850 | case PERF_TYPE_HARDWARE: | ||
| 851 | case PERF_TYPE_HW_CACHE: | ||
| 852 | break; | ||
| 853 | |||
| 854 | default: | ||
| 855 | return -ENOENT; | ||
| 856 | } | ||
| 857 | |||
| 858 | err = __tile_event_init(event); | ||
| 859 | if (err) { | ||
| 860 | if (event->destroy) | ||
| 861 | event->destroy(event); | ||
| 862 | } | ||
| 863 | return err; | ||
| 864 | } | ||
| 865 | |||
| 866 | static struct pmu tilera_pmu = { | ||
| 867 | .event_init = tile_event_init, | ||
| 868 | .add = tile_pmu_add, | ||
| 869 | .del = tile_pmu_del, | ||
| 870 | |||
| 871 | .start = tile_pmu_start, | ||
| 872 | .stop = tile_pmu_stop, | ||
| 873 | |||
| 874 | .read = tile_pmu_read, | ||
| 875 | }; | ||
| 876 | |||
| 877 | /* | ||
| 878 | * PMU's IRQ handler, PMU has 2 interrupts, they share the same handler. | ||
| 879 | */ | ||
| 880 | int tile_pmu_handle_irq(struct pt_regs *regs, int fault) | ||
| 881 | { | ||
| 882 | struct perf_sample_data data; | ||
| 883 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 884 | struct perf_event *event; | ||
| 885 | struct hw_perf_event *hwc; | ||
| 886 | u64 val; | ||
| 887 | unsigned long status; | ||
| 888 | int bit; | ||
| 889 | |||
| 890 | __get_cpu_var(perf_irqs)++; | ||
| 891 | |||
| 892 | if (!atomic_read(&tile_active_events)) | ||
| 893 | return 0; | ||
| 894 | |||
| 895 | status = pmc_get_overflow(); | ||
| 896 | pmc_ack_overflow(status); | ||
| 897 | |||
| 898 | for_each_set_bit(bit, &status, tile_pmu->num_counters) { | ||
| 899 | |||
| 900 | event = cpuc->events[bit]; | ||
| 901 | |||
| 902 | if (!event) | ||
| 903 | continue; | ||
| 904 | |||
| 905 | if (!test_bit(bit, cpuc->active_mask)) | ||
| 906 | continue; | ||
| 907 | |||
| 908 | hwc = &event->hw; | ||
| 909 | |||
| 910 | val = tile_perf_event_update(event); | ||
| 911 | if (val & (1ULL << (tile_pmu->cntval_bits - 1))) | ||
| 912 | continue; | ||
| 913 | |||
| 914 | perf_sample_data_init(&data, 0, event->hw.last_period); | ||
| 915 | if (!tile_event_set_period(event)) | ||
| 916 | continue; | ||
| 917 | |||
| 918 | if (perf_event_overflow(event, &data, regs)) | ||
| 919 | tile_pmu_stop(event, 0); | ||
| 920 | } | ||
| 921 | |||
| 922 | return 0; | ||
| 923 | } | ||
| 924 | |||
| 925 | static bool __init supported_pmu(void) | ||
| 926 | { | ||
| 927 | tile_pmu = &tilepmu; | ||
| 928 | return true; | ||
| 929 | } | ||
| 930 | |||
| 931 | int __init init_hw_perf_events(void) | ||
| 932 | { | ||
| 933 | supported_pmu(); | ||
| 934 | perf_pmu_register(&tilera_pmu, "cpu", PERF_TYPE_RAW); | ||
| 935 | return 0; | ||
| 936 | } | ||
| 937 | arch_initcall(init_hw_perf_events); | ||
| 938 | |||
| 939 | /* Callchain handling code. */ | ||
| 940 | |||
| 941 | /* | ||
| 942 | * Tile specific backtracing code for perf_events. | ||
| 943 | */ | ||
| 944 | static inline void perf_callchain(struct perf_callchain_entry *entry, | ||
| 945 | struct pt_regs *regs) | ||
| 946 | { | ||
| 947 | struct KBacktraceIterator kbt; | ||
| 948 | unsigned int i; | ||
| 949 | |||
| 950 | /* | ||
| 951 | * Get the address just after the "jalr" instruction that | ||
| 952 | * jumps to the handler for a syscall. When we find this | ||
| 953 | * address in a backtrace, we silently ignore it, which gives | ||
| 954 | * us a one-step backtrace connection from the sys_xxx() | ||
| 955 | * function in the kernel to the xxx() function in libc. | ||
| 956 | * Otherwise, we lose the ability to properly attribute time | ||
| 957 | * from the libc calls to the kernel implementations, since | ||
| 958 | * oprofile only considers PCs from backtraces a pair at a time. | ||
| 959 | */ | ||
| 960 | unsigned long handle_syscall_pc = handle_syscall_link_address(); | ||
| 961 | |||
| 962 | KBacktraceIterator_init(&kbt, NULL, regs); | ||
| 963 | kbt.profile = 1; | ||
| 964 | |||
| 965 | /* | ||
| 966 | * The sample for the pc is already recorded. Now we are adding the | ||
| 967 | * address of the callsites on the stack. Our iterator starts | ||
| 968 | * with the frame of the (already sampled) call site. If our | ||
| 969 | * iterator contained a "return address" field, we could have just | ||
| 970 | * used it and wouldn't have needed to skip the first | ||
| 971 | * frame. That's in effect what the arm and x86 versions do. | ||
| 972 | * Instead we peel off the first iteration to get the equivalent | ||
| 973 | * behavior. | ||
| 974 | */ | ||
| 975 | |||
| 976 | if (KBacktraceIterator_end(&kbt)) | ||
| 977 | return; | ||
| 978 | KBacktraceIterator_next(&kbt); | ||
| 979 | |||
| 980 | /* | ||
| 981 | * Set stack depth to 16 for user and kernel space respectively, that | ||
| 982 | * is, total 32 stack frames. | ||
| 983 | */ | ||
| 984 | for (i = 0; i < 16; ++i) { | ||
| 985 | unsigned long pc; | ||
| 986 | if (KBacktraceIterator_end(&kbt)) | ||
| 987 | break; | ||
| 988 | pc = kbt.it.pc; | ||
| 989 | if (pc != handle_syscall_pc) | ||
| 990 | perf_callchain_store(entry, pc); | ||
| 991 | KBacktraceIterator_next(&kbt); | ||
| 992 | } | ||
| 993 | } | ||
| 994 | |||
| 995 | void perf_callchain_user(struct perf_callchain_entry *entry, | ||
| 996 | struct pt_regs *regs) | ||
| 997 | { | ||
| 998 | perf_callchain(entry, regs); | ||
| 999 | } | ||
| 1000 | |||
| 1001 | void perf_callchain_kernel(struct perf_callchain_entry *entry, | ||
| 1002 | struct pt_regs *regs) | ||
| 1003 | { | ||
| 1004 | perf_callchain(entry, regs); | ||
| 1005 | } | ||
diff --git a/arch/tile/kernel/pmc.c b/arch/tile/kernel/pmc.c new file mode 100644 index 000000000000..db62cc34b955 --- /dev/null +++ b/arch/tile/kernel/pmc.c | |||
| @@ -0,0 +1,121 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2014 Tilera Corporation. All Rights Reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public License | ||
| 6 | * as published by the Free Software Foundation, version 2. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, but | ||
| 9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <linux/errno.h> | ||
| 16 | #include <linux/spinlock.h> | ||
| 17 | #include <linux/module.h> | ||
| 18 | #include <linux/atomic.h> | ||
| 19 | #include <linux/interrupt.h> | ||
| 20 | |||
| 21 | #include <asm/processor.h> | ||
| 22 | #include <asm/pmc.h> | ||
| 23 | |||
| 24 | perf_irq_t perf_irq = NULL; | ||
| 25 | int handle_perf_interrupt(struct pt_regs *regs, int fault) | ||
| 26 | { | ||
| 27 | int retval; | ||
| 28 | |||
| 29 | if (!perf_irq) | ||
| 30 | panic("Unexpected PERF_COUNT interrupt %d\n", fault); | ||
| 31 | |||
| 32 | nmi_enter(); | ||
| 33 | retval = perf_irq(regs, fault); | ||
| 34 | nmi_exit(); | ||
| 35 | return retval; | ||
| 36 | } | ||
| 37 | |||
| 38 | /* Reserve PMC hardware if it is available. */ | ||
| 39 | perf_irq_t reserve_pmc_hardware(perf_irq_t new_perf_irq) | ||
| 40 | { | ||
| 41 | return cmpxchg(&perf_irq, NULL, new_perf_irq); | ||
| 42 | } | ||
| 43 | EXPORT_SYMBOL(reserve_pmc_hardware); | ||
| 44 | |||
| 45 | /* Release PMC hardware. */ | ||
| 46 | void release_pmc_hardware(void) | ||
| 47 | { | ||
| 48 | perf_irq = NULL; | ||
| 49 | } | ||
| 50 | EXPORT_SYMBOL(release_pmc_hardware); | ||
| 51 | |||
| 52 | |||
| 53 | /* | ||
| 54 | * Get current overflow status of each performance counter, | ||
| 55 | * and auxiliary performance counter. | ||
| 56 | */ | ||
| 57 | unsigned long | ||
| 58 | pmc_get_overflow(void) | ||
| 59 | { | ||
| 60 | unsigned long status; | ||
| 61 | |||
| 62 | /* | ||
| 63 | * merge base+aux into a single vector | ||
| 64 | */ | ||
| 65 | status = __insn_mfspr(SPR_PERF_COUNT_STS); | ||
| 66 | status |= __insn_mfspr(SPR_AUX_PERF_COUNT_STS) << TILE_BASE_COUNTERS; | ||
| 67 | return status; | ||
| 68 | } | ||
| 69 | |||
| 70 | /* | ||
| 71 | * Clear the status bit for the corresponding counter, if written | ||
| 72 | * with a one. | ||
| 73 | */ | ||
| 74 | void | ||
| 75 | pmc_ack_overflow(unsigned long status) | ||
| 76 | { | ||
| 77 | /* | ||
| 78 | * clear overflow status by writing ones | ||
| 79 | */ | ||
| 80 | __insn_mtspr(SPR_PERF_COUNT_STS, status); | ||
| 81 | __insn_mtspr(SPR_AUX_PERF_COUNT_STS, status >> TILE_BASE_COUNTERS); | ||
| 82 | } | ||
| 83 | |||
| 84 | /* | ||
| 85 | * The perf count interrupts are masked and unmasked explicitly, | ||
| 86 | * and only here. The normal irq_enable() does not enable them, | ||
| 87 | * and irq_disable() does not disable them. That lets these | ||
| 88 | * routines drive the perf count interrupts orthogonally. | ||
| 89 | * | ||
| 90 | * We also mask the perf count interrupts on entry to the perf count | ||
| 91 | * interrupt handler in assembly code, and by default unmask them | ||
| 92 | * again (with interrupt critical section protection) just before | ||
| 93 | * returning from the interrupt. If the perf count handler returns | ||
| 94 | * a non-zero error code, then we don't re-enable them before returning. | ||
| 95 | * | ||
| 96 | * For Pro, we rely on both interrupts being in the same word to update | ||
| 97 | * them atomically so we never have one enabled and one disabled. | ||
| 98 | */ | ||
| 99 | |||
| 100 | #if CHIP_HAS_SPLIT_INTR_MASK() | ||
| 101 | # if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32 | ||
| 102 | # error Fix assumptions about which word PERF_COUNT interrupts are in | ||
| 103 | # endif | ||
| 104 | #endif | ||
| 105 | |||
| 106 | static inline unsigned long long pmc_mask(void) | ||
| 107 | { | ||
| 108 | unsigned long long mask = 1ULL << INT_PERF_COUNT; | ||
| 109 | mask |= 1ULL << INT_AUX_PERF_COUNT; | ||
| 110 | return mask; | ||
| 111 | } | ||
| 112 | |||
| 113 | void unmask_pmc_interrupts(void) | ||
| 114 | { | ||
| 115 | interrupt_mask_reset_mask(pmc_mask()); | ||
| 116 | } | ||
| 117 | |||
| 118 | void mask_pmc_interrupts(void) | ||
| 119 | { | ||
| 120 | interrupt_mask_set_mask(pmc_mask()); | ||
| 121 | } | ||
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index 5d10642db63e..462dcd0c1700 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c | |||
| @@ -236,7 +236,15 @@ cycles_t ns2cycles(unsigned long nsecs) | |||
| 236 | * clock frequency. | 236 | * clock frequency. |
| 237 | */ | 237 | */ |
| 238 | struct clock_event_device *dev = &__raw_get_cpu_var(tile_timer); | 238 | struct clock_event_device *dev = &__raw_get_cpu_var(tile_timer); |
| 239 | return ((u64)nsecs * dev->mult) >> dev->shift; | 239 | |
| 240 | /* | ||
| 241 | * as in clocksource.h and x86's timer.h, we split the calculation | ||
| 242 | * into 2 parts to avoid unecessary overflow of the intermediate | ||
| 243 | * value. This will not lead to any loss of precision. | ||
| 244 | */ | ||
| 245 | u64 quot = (u64)nsecs >> dev->shift; | ||
| 246 | u64 rem = (u64)nsecs & ((1ULL << dev->shift) - 1); | ||
| 247 | return quot * dev->mult + ((rem * dev->mult) >> dev->shift); | ||
| 240 | } | 248 | } |
| 241 | 249 | ||
| 242 | void update_vsyscall_tz(void) | 250 | void update_vsyscall_tz(void) |
diff --git a/arch/tile/kernel/vdso/Makefile b/arch/tile/kernel/vdso/Makefile index e2b7a2f4ee41..a025f63d54cd 100644 --- a/arch/tile/kernel/vdso/Makefile +++ b/arch/tile/kernel/vdso/Makefile | |||
| @@ -104,7 +104,7 @@ $(obj-vdso32:%=%): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) | |||
| 104 | $(obj-vdso32:%=%): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) | 104 | $(obj-vdso32:%=%): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) |
| 105 | 105 | ||
| 106 | $(obj)/vgettimeofday32.o: $(obj)/vgettimeofday.c | 106 | $(obj)/vgettimeofday32.o: $(obj)/vgettimeofday.c |
| 107 | $(call if_changed,cc_o_c) | 107 | $(call if_changed_rule,cc_o_c) |
| 108 | 108 | ||
| 109 | $(obj)/vrt_sigreturn32.o: $(obj)/vrt_sigreturn.S | 109 | $(obj)/vrt_sigreturn32.o: $(obj)/vrt_sigreturn.S |
| 110 | $(call if_changed,as_o_S) | 110 | $(call if_changed,as_o_S) |
diff --git a/tools/perf/config/Makefile.arch b/tools/perf/config/Makefile.arch index fef8ae922800..4b06719ee984 100644 --- a/tools/perf/config/Makefile.arch +++ b/tools/perf/config/Makefile.arch | |||
| @@ -5,7 +5,8 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ | |||
| 5 | -e s/arm.*/arm/ -e s/sa110/arm/ \ | 5 | -e s/arm.*/arm/ -e s/sa110/arm/ \ |
| 6 | -e s/s390x/s390/ -e s/parisc64/parisc/ \ | 6 | -e s/s390x/s390/ -e s/parisc64/parisc/ \ |
| 7 | -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ | 7 | -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ |
| 8 | -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ ) | 8 | -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ |
| 9 | -e s/tile.*/tile/ ) | ||
| 9 | 10 | ||
| 10 | # Additional ARCH settings for x86 | 11 | # Additional ARCH settings for x86 |
| 11 | ifeq ($(ARCH),i386) | 12 | ifeq ($(ARCH),i386) |
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index e18a8b5e6953..5c11ecad02a9 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
| @@ -145,6 +145,14 @@ | |||
| 145 | #define CPUINFO_PROC "core ID" | 145 | #define CPUINFO_PROC "core ID" |
| 146 | #endif | 146 | #endif |
| 147 | 147 | ||
| 148 | #ifdef __tile__ | ||
| 149 | #define mb() asm volatile ("mf" ::: "memory") | ||
| 150 | #define wmb() asm volatile ("mf" ::: "memory") | ||
| 151 | #define rmb() asm volatile ("mf" ::: "memory") | ||
| 152 | #define cpu_relax() asm volatile ("mfspr zero, PASS" ::: "memory") | ||
| 153 | #define CPUINFO_PROC "model name" | ||
| 154 | #endif | ||
| 155 | |||
| 148 | #define barrier() asm volatile ("" ::: "memory") | 156 | #define barrier() asm volatile ("" ::: "memory") |
| 149 | 157 | ||
| 150 | #ifndef cpu_relax | 158 | #ifndef cpu_relax |
