diff options
| author | Zhigang Lu <zlu@tilera.com> | 2014-01-27 21:03:50 -0500 |
|---|---|---|
| committer | Chris Metcalf <cmetcalf@tilera.com> | 2014-03-07 11:19:48 -0500 |
| commit | 8d61dd7d3e374eb52a174ab04169b04e3d9d729f (patch) | |
| tree | 0312a9743c802bc329ebcb6ec7952727a5adc204 | |
| parent | ba67823163c963de7f1f2d87526c9c87f3a3ea0b (diff) | |
tile/perf: Support perf_events on tilegx and tilepro
Add perf support for tile architecture.
Signed-off-by: Zhigang Lu <zlu@tilera.com>
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
| -rw-r--r-- | arch/tile/Kconfig | 2 | ||||
| -rw-r--r-- | arch/tile/include/asm/perf_event.h | 22 | ||||
| -rw-r--r-- | arch/tile/kernel/Makefile | 1 | ||||
| -rw-r--r-- | arch/tile/kernel/irq.c | 18 | ||||
| -rw-r--r-- | arch/tile/kernel/perf_event.c | 1005 |
5 files changed, 1048 insertions, 0 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 3067b15e80d6..31c8c6223995 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | 3 | ||
| 4 | config TILE | 4 | config TILE |
| 5 | def_bool y | 5 | def_bool y |
| 6 | select HAVE_PERF_EVENTS | ||
| 7 | select USE_PMC if PERF_EVENTS | ||
| 6 | select HAVE_DMA_ATTRS | 8 | select HAVE_DMA_ATTRS |
| 7 | select HAVE_DMA_API_DEBUG | 9 | select HAVE_DMA_API_DEBUG |
| 8 | select HAVE_KVM if !TILEGX | 10 | select HAVE_KVM if !TILEGX |
diff --git a/arch/tile/include/asm/perf_event.h b/arch/tile/include/asm/perf_event.h new file mode 100644 index 000000000000..59c5b164e5b6 --- /dev/null +++ b/arch/tile/include/asm/perf_event.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2014 Tilera Corporation. All Rights Reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public License | ||
| 6 | * as published by the Free Software Foundation, version 2. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, but | ||
| 9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef _ASM_TILE_PERF_EVENT_H | ||
| 16 | #define _ASM_TILE_PERF_EVENT_H | ||
| 17 | |||
| 18 | #include <linux/percpu.h> | ||
| 19 | DECLARE_PER_CPU(u64, perf_irqs); | ||
| 20 | |||
| 21 | unsigned long handle_syscall_link_address(void); | ||
| 22 | #endif /* _ASM_TILE_PERF_EVENT_H */ | ||
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index 71d835365c73..21f77bf68c69 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile | |||
| @@ -25,6 +25,7 @@ obj-$(CONFIG_PCI) += pci_gx.o | |||
| 25 | else | 25 | else |
| 26 | obj-$(CONFIG_PCI) += pci.o | 26 | obj-$(CONFIG_PCI) += pci.o |
| 27 | endif | 27 | endif |
| 28 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | ||
| 28 | obj-$(CONFIG_USE_PMC) += pmc.o | 29 | obj-$(CONFIG_USE_PMC) += pmc.o |
| 29 | obj-$(CONFIG_TILE_USB) += usb.o | 30 | obj-$(CONFIG_TILE_USB) += usb.o |
| 30 | obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o | 31 | obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o |
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 0586fdb9352d..906a76bdb31d 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <hv/drv_pcie_rc_intf.h> | 21 | #include <hv/drv_pcie_rc_intf.h> |
| 22 | #include <arch/spr_def.h> | 22 | #include <arch/spr_def.h> |
| 23 | #include <asm/traps.h> | 23 | #include <asm/traps.h> |
| 24 | #include <linux/perf_event.h> | ||
| 24 | 25 | ||
| 25 | /* Bit-flag stored in irq_desc->chip_data to indicate HW-cleared irqs. */ | 26 | /* Bit-flag stored in irq_desc->chip_data to indicate HW-cleared irqs. */ |
| 26 | #define IS_HW_CLEARED 1 | 27 | #define IS_HW_CLEARED 1 |
| @@ -261,6 +262,23 @@ void ack_bad_irq(unsigned int irq) | |||
| 261 | } | 262 | } |
| 262 | 263 | ||
| 263 | /* | 264 | /* |
| 265 | * /proc/interrupts printing: | ||
| 266 | */ | ||
| 267 | int arch_show_interrupts(struct seq_file *p, int prec) | ||
| 268 | { | ||
| 269 | #ifdef CONFIG_PERF_EVENTS | ||
| 270 | int i; | ||
| 271 | |||
| 272 | seq_printf(p, "%*s: ", prec, "PMI"); | ||
| 273 | |||
| 274 | for_each_online_cpu(i) | ||
| 275 | seq_printf(p, "%10llu ", per_cpu(perf_irqs, i)); | ||
| 276 | seq_puts(p, " perf_events\n"); | ||
| 277 | #endif | ||
| 278 | return 0; | ||
| 279 | } | ||
| 280 | |||
| 281 | /* | ||
| 264 | * Generic, controller-independent functions: | 282 | * Generic, controller-independent functions: |
| 265 | */ | 283 | */ |
| 266 | 284 | ||
diff --git a/arch/tile/kernel/perf_event.c b/arch/tile/kernel/perf_event.c new file mode 100644 index 000000000000..2bf6c9c135c1 --- /dev/null +++ b/arch/tile/kernel/perf_event.c | |||
| @@ -0,0 +1,1005 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2014 Tilera Corporation. All Rights Reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public License | ||
| 6 | * as published by the Free Software Foundation, version 2. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, but | ||
| 9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | * | ||
| 14 | * | ||
| 15 | * Perf_events support for Tile processor. | ||
| 16 | * | ||
| 17 | * This code is based upon the x86 perf event | ||
| 18 | * code, which is: | ||
| 19 | * | ||
| 20 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
| 21 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | ||
| 22 | * Copyright (C) 2009 Jaswinder Singh Rajput | ||
| 23 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | ||
| 24 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
| 25 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | ||
| 26 | * Copyright (C) 2009 Google, Inc., Stephane Eranian | ||
| 27 | */ | ||
| 28 | |||
| 29 | #include <linux/kprobes.h> | ||
| 30 | #include <linux/kernel.h> | ||
| 31 | #include <linux/kdebug.h> | ||
| 32 | #include <linux/mutex.h> | ||
| 33 | #include <linux/bitmap.h> | ||
| 34 | #include <linux/irq.h> | ||
| 35 | #include <linux/interrupt.h> | ||
| 36 | #include <linux/perf_event.h> | ||
| 37 | #include <linux/atomic.h> | ||
| 38 | #include <asm/traps.h> | ||
| 39 | #include <asm/stack.h> | ||
| 40 | #include <asm/pmc.h> | ||
| 41 | #include <hv/hypervisor.h> | ||
| 42 | |||
| 43 | #define TILE_MAX_COUNTERS 4 | ||
| 44 | |||
| 45 | #define PERF_COUNT_0_IDX 0 | ||
| 46 | #define PERF_COUNT_1_IDX 1 | ||
| 47 | #define AUX_PERF_COUNT_0_IDX 2 | ||
| 48 | #define AUX_PERF_COUNT_1_IDX 3 | ||
| 49 | |||
| 50 | struct cpu_hw_events { | ||
| 51 | int n_events; | ||
| 52 | struct perf_event *events[TILE_MAX_COUNTERS]; /* counter order */ | ||
| 53 | struct perf_event *event_list[TILE_MAX_COUNTERS]; /* enabled | ||
| 54 | order */ | ||
| 55 | int assign[TILE_MAX_COUNTERS]; | ||
| 56 | unsigned long active_mask[BITS_TO_LONGS(TILE_MAX_COUNTERS)]; | ||
| 57 | unsigned long used_mask; | ||
| 58 | }; | ||
| 59 | |||
| 60 | /* TILE arch specific performance monitor unit */ | ||
| 61 | struct tile_pmu { | ||
| 62 | const char *name; | ||
| 63 | int version; | ||
| 64 | const int *hw_events; /* generic hw events table */ | ||
| 65 | /* generic hw cache events table */ | ||
| 66 | const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] | ||
| 67 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
| 68 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
| 69 | int (*map_hw_event)(u64); /*method used to map | ||
| 70 | hw events */ | ||
| 71 | int (*map_cache_event)(u64); /*method used to map | ||
| 72 | cache events */ | ||
| 73 | |||
| 74 | u64 max_period; /* max sampling period */ | ||
| 75 | u64 cntval_mask; /* counter width mask */ | ||
| 76 | int cntval_bits; /* counter width */ | ||
| 77 | int max_events; /* max generic hw events | ||
| 78 | in map */ | ||
| 79 | int num_counters; /* number base + aux counters */ | ||
| 80 | int num_base_counters; /* number base counters */ | ||
| 81 | }; | ||
| 82 | |||
| 83 | DEFINE_PER_CPU(u64, perf_irqs); | ||
| 84 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | ||
| 85 | |||
| 86 | #define TILE_OP_UNSUPP (-1) | ||
| 87 | |||
| 88 | #ifndef __tilegx__ | ||
| 89 | /* TILEPro hardware events map */ | ||
| 90 | static const int tile_hw_event_map[] = { | ||
| 91 | [PERF_COUNT_HW_CPU_CYCLES] = 0x01, /* ONE */ | ||
| 92 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x06, /* MP_BUNDLE_RETIRED */ | ||
| 93 | [PERF_COUNT_HW_CACHE_REFERENCES] = TILE_OP_UNSUPP, | ||
| 94 | [PERF_COUNT_HW_CACHE_MISSES] = TILE_OP_UNSUPP, | ||
| 95 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x16, /* | ||
| 96 | MP_CONDITIONAL_BRANCH_ISSUED */ | ||
| 97 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x14, /* | ||
| 98 | MP_CONDITIONAL_BRANCH_MISSPREDICT */ | ||
| 99 | [PERF_COUNT_HW_BUS_CYCLES] = TILE_OP_UNSUPP, | ||
| 100 | }; | ||
| 101 | #else | ||
| 102 | /* TILEGx hardware events map */ | ||
| 103 | static const int tile_hw_event_map[] = { | ||
| 104 | [PERF_COUNT_HW_CPU_CYCLES] = 0x181, /* ONE */ | ||
| 105 | [PERF_COUNT_HW_INSTRUCTIONS] = 0xdb, /* INSTRUCTION_BUNDLE */ | ||
| 106 | [PERF_COUNT_HW_CACHE_REFERENCES] = TILE_OP_UNSUPP, | ||
| 107 | [PERF_COUNT_HW_CACHE_MISSES] = TILE_OP_UNSUPP, | ||
| 108 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0xd9, /* | ||
| 109 | COND_BRANCH_PRED_CORRECT */ | ||
| 110 | [PERF_COUNT_HW_BRANCH_MISSES] = 0xda, /* | ||
| 111 | COND_BRANCH_PRED_INCORRECT */ | ||
| 112 | [PERF_COUNT_HW_BUS_CYCLES] = TILE_OP_UNSUPP, | ||
| 113 | }; | ||
| 114 | #endif | ||
| 115 | |||
| 116 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
| 117 | |||
| 118 | /* | ||
| 119 | * Generalized hw caching related hw_event table, filled | ||
| 120 | * in on a per model basis. A value of -1 means | ||
| 121 | * 'not supported', any other value means the | ||
| 122 | * raw hw_event ID. | ||
| 123 | */ | ||
| 124 | #ifndef __tilegx__ | ||
| 125 | /* TILEPro hardware cache event map */ | ||
| 126 | static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX] | ||
| 127 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
| 128 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
| 129 | [C(L1D)] = { | ||
| 130 | [C(OP_READ)] = { | ||
| 131 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 132 | [C(RESULT_MISS)] = 0x21, /* RD_MISS */ | ||
| 133 | }, | ||
| 134 | [C(OP_WRITE)] = { | ||
| 135 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 136 | [C(RESULT_MISS)] = 0x22, /* WR_MISS */ | ||
| 137 | }, | ||
| 138 | [C(OP_PREFETCH)] = { | ||
| 139 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 140 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 141 | }, | ||
| 142 | }, | ||
| 143 | [C(L1I)] = { | ||
| 144 | [C(OP_READ)] = { | ||
| 145 | [C(RESULT_ACCESS)] = 0x12, /* MP_ICACHE_HIT_ISSUED */ | ||
| 146 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 147 | }, | ||
| 148 | [C(OP_WRITE)] = { | ||
| 149 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 150 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 151 | }, | ||
| 152 | [C(OP_PREFETCH)] = { | ||
| 153 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 154 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 155 | }, | ||
| 156 | }, | ||
| 157 | [C(LL)] = { | ||
| 158 | [C(OP_READ)] = { | ||
| 159 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 160 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 161 | }, | ||
| 162 | [C(OP_WRITE)] = { | ||
| 163 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 164 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 165 | }, | ||
| 166 | [C(OP_PREFETCH)] = { | ||
| 167 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 168 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 169 | }, | ||
| 170 | }, | ||
| 171 | [C(DTLB)] = { | ||
| 172 | [C(OP_READ)] = { | ||
| 173 | [C(RESULT_ACCESS)] = 0x1d, /* TLB_CNT */ | ||
| 174 | [C(RESULT_MISS)] = 0x20, /* TLB_EXCEPTION */ | ||
| 175 | }, | ||
| 176 | [C(OP_WRITE)] = { | ||
| 177 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 178 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 179 | }, | ||
| 180 | [C(OP_PREFETCH)] = { | ||
| 181 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 182 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 183 | }, | ||
| 184 | }, | ||
| 185 | [C(ITLB)] = { | ||
| 186 | [C(OP_READ)] = { | ||
| 187 | [C(RESULT_ACCESS)] = 0x13, /* MP_ITLB_HIT_ISSUED */ | ||
| 188 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 189 | }, | ||
| 190 | [C(OP_WRITE)] = { | ||
| 191 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 192 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 193 | }, | ||
| 194 | [C(OP_PREFETCH)] = { | ||
| 195 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 196 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 197 | }, | ||
| 198 | }, | ||
| 199 | [C(BPU)] = { | ||
| 200 | [C(OP_READ)] = { | ||
| 201 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 202 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 203 | }, | ||
| 204 | [C(OP_WRITE)] = { | ||
| 205 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 206 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 207 | }, | ||
| 208 | [C(OP_PREFETCH)] = { | ||
| 209 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 210 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 211 | }, | ||
| 212 | }, | ||
| 213 | }; | ||
| 214 | #else | ||
| 215 | /* TILEGx hardware events map */ | ||
| 216 | static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX] | ||
| 217 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
| 218 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
| 219 | [C(L1D)] = { | ||
| 220 | /* | ||
| 221 | * Like some other architectures (e.g. ARM), the performance | ||
| 222 | * counters don't differentiate between read and write | ||
| 223 | * accesses/misses, so this isn't strictly correct, but it's the | ||
| 224 | * best we can do. Writes and reads get combined. | ||
| 225 | */ | ||
| 226 | [C(OP_READ)] = { | ||
| 227 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 228 | [C(RESULT_MISS)] = 0x44, /* RD_MISS */ | ||
| 229 | }, | ||
| 230 | [C(OP_WRITE)] = { | ||
| 231 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 232 | [C(RESULT_MISS)] = 0x45, /* WR_MISS */ | ||
| 233 | }, | ||
| 234 | [C(OP_PREFETCH)] = { | ||
| 235 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 236 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 237 | }, | ||
| 238 | }, | ||
| 239 | [C(L1I)] = { | ||
| 240 | [C(OP_READ)] = { | ||
| 241 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 242 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 243 | }, | ||
| 244 | [C(OP_WRITE)] = { | ||
| 245 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 246 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 247 | }, | ||
| 248 | [C(OP_PREFETCH)] = { | ||
| 249 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 250 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 251 | }, | ||
| 252 | }, | ||
| 253 | [C(LL)] = { | ||
| 254 | [C(OP_READ)] = { | ||
| 255 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 256 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 257 | }, | ||
| 258 | [C(OP_WRITE)] = { | ||
| 259 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 260 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 261 | }, | ||
| 262 | [C(OP_PREFETCH)] = { | ||
| 263 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 264 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 265 | }, | ||
| 266 | }, | ||
| 267 | [C(DTLB)] = { | ||
| 268 | [C(OP_READ)] = { | ||
| 269 | [C(RESULT_ACCESS)] = 0x40, /* TLB_CNT */ | ||
| 270 | [C(RESULT_MISS)] = 0x43, /* TLB_EXCEPTION */ | ||
| 271 | }, | ||
| 272 | [C(OP_WRITE)] = { | ||
| 273 | [C(RESULT_ACCESS)] = 0x40, /* TLB_CNT */ | ||
| 274 | [C(RESULT_MISS)] = 0x43, /* TLB_EXCEPTION */ | ||
| 275 | }, | ||
| 276 | [C(OP_PREFETCH)] = { | ||
| 277 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 278 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 279 | }, | ||
| 280 | }, | ||
| 281 | [C(ITLB)] = { | ||
| 282 | [C(OP_READ)] = { | ||
| 283 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 284 | [C(RESULT_MISS)] = 0xd4, /* ITLB_MISS_INT */ | ||
| 285 | }, | ||
| 286 | [C(OP_WRITE)] = { | ||
| 287 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 288 | [C(RESULT_MISS)] = 0xd4, /* ITLB_MISS_INT */ | ||
| 289 | }, | ||
| 290 | [C(OP_PREFETCH)] = { | ||
| 291 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 292 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 293 | }, | ||
| 294 | }, | ||
| 295 | [C(BPU)] = { | ||
| 296 | [C(OP_READ)] = { | ||
| 297 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 298 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 299 | }, | ||
| 300 | [C(OP_WRITE)] = { | ||
| 301 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 302 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 303 | }, | ||
| 304 | [C(OP_PREFETCH)] = { | ||
| 305 | [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, | ||
| 306 | [C(RESULT_MISS)] = TILE_OP_UNSUPP, | ||
| 307 | }, | ||
| 308 | }, | ||
| 309 | }; | ||
| 310 | #endif | ||
| 311 | |||
| 312 | static atomic_t tile_active_events; | ||
| 313 | static DEFINE_MUTEX(perf_intr_reserve_mutex); | ||
| 314 | |||
| 315 | static int tile_map_hw_event(u64 config); | ||
| 316 | static int tile_map_cache_event(u64 config); | ||
| 317 | |||
| 318 | static int tile_pmu_handle_irq(struct pt_regs *regs, int fault); | ||
| 319 | |||
| 320 | /* | ||
| 321 | * To avoid new_raw_count getting larger then pre_raw_count | ||
| 322 | * in tile_perf_event_update(), we limit the value of max_period to 2^31 - 1. | ||
| 323 | */ | ||
| 324 | static const struct tile_pmu tilepmu = { | ||
| 325 | #ifndef __tilegx__ | ||
| 326 | .name = "tilepro", | ||
| 327 | #else | ||
| 328 | .name = "tilegx", | ||
| 329 | #endif | ||
| 330 | .max_events = ARRAY_SIZE(tile_hw_event_map), | ||
| 331 | .map_hw_event = tile_map_hw_event, | ||
| 332 | .hw_events = tile_hw_event_map, | ||
| 333 | .map_cache_event = tile_map_cache_event, | ||
| 334 | .cache_events = &tile_cache_event_map, | ||
| 335 | .cntval_bits = 32, | ||
| 336 | .cntval_mask = (1ULL << 32) - 1, | ||
| 337 | .max_period = (1ULL << 31) - 1, | ||
| 338 | .num_counters = TILE_MAX_COUNTERS, | ||
| 339 | .num_base_counters = TILE_BASE_COUNTERS, | ||
| 340 | }; | ||
| 341 | |||
| 342 | static const struct tile_pmu *tile_pmu __read_mostly; | ||
| 343 | |||
| 344 | /* | ||
| 345 | * Check whether perf event is enabled. | ||
| 346 | */ | ||
| 347 | int tile_perf_enabled(void) | ||
| 348 | { | ||
| 349 | return atomic_read(&tile_active_events) != 0; | ||
| 350 | } | ||
| 351 | |||
| 352 | /* | ||
| 353 | * Read Performance Counters. | ||
| 354 | */ | ||
| 355 | static inline u64 read_counter(int idx) | ||
| 356 | { | ||
| 357 | u64 val = 0; | ||
| 358 | |||
| 359 | /* __insn_mfspr() only takes an immediate argument */ | ||
| 360 | switch (idx) { | ||
| 361 | case PERF_COUNT_0_IDX: | ||
| 362 | val = __insn_mfspr(SPR_PERF_COUNT_0); | ||
| 363 | break; | ||
| 364 | case PERF_COUNT_1_IDX: | ||
| 365 | val = __insn_mfspr(SPR_PERF_COUNT_1); | ||
| 366 | break; | ||
| 367 | case AUX_PERF_COUNT_0_IDX: | ||
| 368 | val = __insn_mfspr(SPR_AUX_PERF_COUNT_0); | ||
| 369 | break; | ||
| 370 | case AUX_PERF_COUNT_1_IDX: | ||
| 371 | val = __insn_mfspr(SPR_AUX_PERF_COUNT_1); | ||
| 372 | break; | ||
| 373 | default: | ||
| 374 | WARN_ON_ONCE(idx > AUX_PERF_COUNT_1_IDX || | ||
| 375 | idx < PERF_COUNT_0_IDX); | ||
| 376 | } | ||
| 377 | |||
| 378 | return val; | ||
| 379 | } | ||
| 380 | |||
| 381 | /* | ||
| 382 | * Write Performance Counters. | ||
| 383 | */ | ||
| 384 | static inline void write_counter(int idx, u64 value) | ||
| 385 | { | ||
| 386 | /* __insn_mtspr() only takes an immediate argument */ | ||
| 387 | switch (idx) { | ||
| 388 | case PERF_COUNT_0_IDX: | ||
| 389 | __insn_mtspr(SPR_PERF_COUNT_0, value); | ||
| 390 | break; | ||
| 391 | case PERF_COUNT_1_IDX: | ||
| 392 | __insn_mtspr(SPR_PERF_COUNT_1, value); | ||
| 393 | break; | ||
| 394 | case AUX_PERF_COUNT_0_IDX: | ||
| 395 | __insn_mtspr(SPR_AUX_PERF_COUNT_0, value); | ||
| 396 | break; | ||
| 397 | case AUX_PERF_COUNT_1_IDX: | ||
| 398 | __insn_mtspr(SPR_AUX_PERF_COUNT_1, value); | ||
| 399 | break; | ||
| 400 | default: | ||
| 401 | WARN_ON_ONCE(idx > AUX_PERF_COUNT_1_IDX || | ||
| 402 | idx < PERF_COUNT_0_IDX); | ||
| 403 | } | ||
| 404 | } | ||
| 405 | |||
| 406 | /* | ||
| 407 | * Enable performance event by setting | ||
| 408 | * Performance Counter Control registers. | ||
| 409 | */ | ||
| 410 | static inline void tile_pmu_enable_event(struct perf_event *event) | ||
| 411 | { | ||
| 412 | struct hw_perf_event *hwc = &event->hw; | ||
| 413 | unsigned long cfg, mask; | ||
| 414 | int shift, idx = hwc->idx; | ||
| 415 | |||
| 416 | /* | ||
| 417 | * prevent early activation from tile_pmu_start() in hw_perf_enable | ||
| 418 | */ | ||
| 419 | |||
| 420 | if (WARN_ON_ONCE(idx == -1)) | ||
| 421 | return; | ||
| 422 | |||
| 423 | if (idx < tile_pmu->num_base_counters) | ||
| 424 | cfg = __insn_mfspr(SPR_PERF_COUNT_CTL); | ||
| 425 | else | ||
| 426 | cfg = __insn_mfspr(SPR_AUX_PERF_COUNT_CTL); | ||
| 427 | |||
| 428 | switch (idx) { | ||
| 429 | case PERF_COUNT_0_IDX: | ||
| 430 | case AUX_PERF_COUNT_0_IDX: | ||
| 431 | mask = TILE_EVENT_MASK; | ||
| 432 | shift = 0; | ||
| 433 | break; | ||
| 434 | case PERF_COUNT_1_IDX: | ||
| 435 | case AUX_PERF_COUNT_1_IDX: | ||
| 436 | mask = TILE_EVENT_MASK << 16; | ||
| 437 | shift = 16; | ||
| 438 | break; | ||
| 439 | default: | ||
| 440 | WARN_ON_ONCE(idx < PERF_COUNT_0_IDX || | ||
| 441 | idx > AUX_PERF_COUNT_1_IDX); | ||
| 442 | return; | ||
| 443 | } | ||
| 444 | |||
| 445 | /* Clear mask bits to enable the event. */ | ||
| 446 | cfg &= ~mask; | ||
| 447 | cfg |= hwc->config << shift; | ||
| 448 | |||
| 449 | if (idx < tile_pmu->num_base_counters) | ||
| 450 | __insn_mtspr(SPR_PERF_COUNT_CTL, cfg); | ||
| 451 | else | ||
| 452 | __insn_mtspr(SPR_AUX_PERF_COUNT_CTL, cfg); | ||
| 453 | } | ||
| 454 | |||
| 455 | /* | ||
| 456 | * Disable performance event by clearing | ||
| 457 | * Performance Counter Control registers. | ||
| 458 | */ | ||
| 459 | static inline void tile_pmu_disable_event(struct perf_event *event) | ||
| 460 | { | ||
| 461 | struct hw_perf_event *hwc = &event->hw; | ||
| 462 | unsigned long cfg, mask; | ||
| 463 | int idx = hwc->idx; | ||
| 464 | |||
| 465 | if (idx == -1) | ||
| 466 | return; | ||
| 467 | |||
| 468 | if (idx < tile_pmu->num_base_counters) | ||
| 469 | cfg = __insn_mfspr(SPR_PERF_COUNT_CTL); | ||
| 470 | else | ||
| 471 | cfg = __insn_mfspr(SPR_AUX_PERF_COUNT_CTL); | ||
| 472 | |||
| 473 | switch (idx) { | ||
| 474 | case PERF_COUNT_0_IDX: | ||
| 475 | case AUX_PERF_COUNT_0_IDX: | ||
| 476 | mask = TILE_PLM_MASK; | ||
| 477 | break; | ||
| 478 | case PERF_COUNT_1_IDX: | ||
| 479 | case AUX_PERF_COUNT_1_IDX: | ||
| 480 | mask = TILE_PLM_MASK << 16; | ||
| 481 | break; | ||
| 482 | default: | ||
| 483 | WARN_ON_ONCE(idx < PERF_COUNT_0_IDX || | ||
| 484 | idx > AUX_PERF_COUNT_1_IDX); | ||
| 485 | return; | ||
| 486 | } | ||
| 487 | |||
| 488 | /* Set mask bits to disable the event. */ | ||
| 489 | cfg |= mask; | ||
| 490 | |||
| 491 | if (idx < tile_pmu->num_base_counters) | ||
| 492 | __insn_mtspr(SPR_PERF_COUNT_CTL, cfg); | ||
| 493 | else | ||
| 494 | __insn_mtspr(SPR_AUX_PERF_COUNT_CTL, cfg); | ||
| 495 | } | ||
| 496 | |||
| 497 | /* | ||
| 498 | * Propagate event elapsed time into the generic event. | ||
| 499 | * Can only be executed on the CPU where the event is active. | ||
| 500 | * Returns the delta events processed. | ||
| 501 | */ | ||
| 502 | static u64 tile_perf_event_update(struct perf_event *event) | ||
| 503 | { | ||
| 504 | struct hw_perf_event *hwc = &event->hw; | ||
| 505 | int shift = 64 - tile_pmu->cntval_bits; | ||
| 506 | u64 prev_raw_count, new_raw_count; | ||
| 507 | u64 oldval; | ||
| 508 | int idx = hwc->idx; | ||
| 509 | u64 delta; | ||
| 510 | |||
| 511 | /* | ||
| 512 | * Careful: an NMI might modify the previous event value. | ||
| 513 | * | ||
| 514 | * Our tactic to handle this is to first atomically read and | ||
| 515 | * exchange a new raw count - then add that new-prev delta | ||
| 516 | * count to the generic event atomically: | ||
| 517 | */ | ||
| 518 | again: | ||
| 519 | prev_raw_count = local64_read(&hwc->prev_count); | ||
| 520 | new_raw_count = read_counter(idx); | ||
| 521 | |||
| 522 | oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
| 523 | new_raw_count); | ||
| 524 | if (oldval != prev_raw_count) | ||
| 525 | goto again; | ||
| 526 | |||
| 527 | /* | ||
| 528 | * Now we have the new raw value and have updated the prev | ||
| 529 | * timestamp already. We can now calculate the elapsed delta | ||
| 530 | * (event-)time and add that to the generic event. | ||
| 531 | * | ||
| 532 | * Careful, not all hw sign-extends above the physical width | ||
| 533 | * of the count. | ||
| 534 | */ | ||
| 535 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | ||
| 536 | delta >>= shift; | ||
| 537 | |||
| 538 | local64_add(delta, &event->count); | ||
| 539 | local64_sub(delta, &hwc->period_left); | ||
| 540 | |||
| 541 | return new_raw_count; | ||
| 542 | } | ||
| 543 | |||
| 544 | /* | ||
| 545 | * Set the next IRQ period, based on the hwc->period_left value. | ||
| 546 | * To be called with the event disabled in hw: | ||
| 547 | */ | ||
| 548 | static int tile_event_set_period(struct perf_event *event) | ||
| 549 | { | ||
| 550 | struct hw_perf_event *hwc = &event->hw; | ||
| 551 | int idx = hwc->idx; | ||
| 552 | s64 left = local64_read(&hwc->period_left); | ||
| 553 | s64 period = hwc->sample_period; | ||
| 554 | int ret = 0; | ||
| 555 | |||
| 556 | /* | ||
| 557 | * If we are way outside a reasonable range then just skip forward: | ||
| 558 | */ | ||
| 559 | if (unlikely(left <= -period)) { | ||
| 560 | left = period; | ||
| 561 | local64_set(&hwc->period_left, left); | ||
| 562 | hwc->last_period = period; | ||
| 563 | ret = 1; | ||
| 564 | } | ||
| 565 | |||
| 566 | if (unlikely(left <= 0)) { | ||
| 567 | left += period; | ||
| 568 | local64_set(&hwc->period_left, left); | ||
| 569 | hwc->last_period = period; | ||
| 570 | ret = 1; | ||
| 571 | } | ||
| 572 | if (left > tile_pmu->max_period) | ||
| 573 | left = tile_pmu->max_period; | ||
| 574 | |||
| 575 | /* | ||
| 576 | * The hw event starts counting from this event offset, | ||
| 577 | * mark it to be able to extra future deltas: | ||
| 578 | */ | ||
| 579 | local64_set(&hwc->prev_count, (u64)-left); | ||
| 580 | |||
| 581 | write_counter(idx, (u64)(-left) & tile_pmu->cntval_mask); | ||
| 582 | |||
| 583 | perf_event_update_userpage(event); | ||
| 584 | |||
| 585 | return ret; | ||
| 586 | } | ||
| 587 | |||
| 588 | /* | ||
| 589 | * Stop the event but do not release the PMU counter | ||
| 590 | */ | ||
| 591 | static void tile_pmu_stop(struct perf_event *event, int flags) | ||
| 592 | { | ||
| 593 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 594 | struct hw_perf_event *hwc = &event->hw; | ||
| 595 | int idx = hwc->idx; | ||
| 596 | |||
| 597 | if (__test_and_clear_bit(idx, cpuc->active_mask)) { | ||
| 598 | tile_pmu_disable_event(event); | ||
| 599 | cpuc->events[hwc->idx] = NULL; | ||
| 600 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | ||
| 601 | hwc->state |= PERF_HES_STOPPED; | ||
| 602 | } | ||
| 603 | |||
| 604 | if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { | ||
| 605 | /* | ||
| 606 | * Drain the remaining delta count out of a event | ||
| 607 | * that we are disabling: | ||
| 608 | */ | ||
| 609 | tile_perf_event_update(event); | ||
| 610 | hwc->state |= PERF_HES_UPTODATE; | ||
| 611 | } | ||
| 612 | } | ||
| 613 | |||
| 614 | /* | ||
| 615 | * Start an event (without re-assigning counter) | ||
| 616 | */ | ||
| 617 | static void tile_pmu_start(struct perf_event *event, int flags) | ||
| 618 | { | ||
| 619 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 620 | int idx = event->hw.idx; | ||
| 621 | |||
| 622 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | ||
| 623 | return; | ||
| 624 | |||
| 625 | if (WARN_ON_ONCE(idx == -1)) | ||
| 626 | return; | ||
| 627 | |||
| 628 | if (flags & PERF_EF_RELOAD) { | ||
| 629 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | ||
| 630 | tile_event_set_period(event); | ||
| 631 | } | ||
| 632 | |||
| 633 | event->hw.state = 0; | ||
| 634 | |||
| 635 | cpuc->events[idx] = event; | ||
| 636 | __set_bit(idx, cpuc->active_mask); | ||
| 637 | |||
| 638 | unmask_pmc_interrupts(); | ||
| 639 | |||
| 640 | tile_pmu_enable_event(event); | ||
| 641 | |||
| 642 | perf_event_update_userpage(event); | ||
| 643 | } | ||
| 644 | |||
| 645 | /* | ||
| 646 | * Add a single event to the PMU. | ||
| 647 | * | ||
| 648 | * The event is added to the group of enabled events | ||
| 649 | * but only if it can be scehduled with existing events. | ||
| 650 | */ | ||
| 651 | static int tile_pmu_add(struct perf_event *event, int flags) | ||
| 652 | { | ||
| 653 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 654 | struct hw_perf_event *hwc; | ||
| 655 | unsigned long mask; | ||
| 656 | int b, max_cnt; | ||
| 657 | |||
| 658 | hwc = &event->hw; | ||
| 659 | |||
| 660 | /* | ||
| 661 | * We are full. | ||
| 662 | */ | ||
| 663 | if (cpuc->n_events == tile_pmu->num_counters) | ||
| 664 | return -ENOSPC; | ||
| 665 | |||
| 666 | cpuc->event_list[cpuc->n_events] = event; | ||
| 667 | cpuc->n_events++; | ||
| 668 | |||
| 669 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
| 670 | if (!(flags & PERF_EF_START)) | ||
| 671 | hwc->state |= PERF_HES_ARCH; | ||
| 672 | |||
| 673 | /* | ||
| 674 | * Find first empty counter. | ||
| 675 | */ | ||
| 676 | max_cnt = tile_pmu->num_counters; | ||
| 677 | mask = ~cpuc->used_mask; | ||
| 678 | |||
| 679 | /* Find next free counter. */ | ||
| 680 | b = find_next_bit(&mask, max_cnt, 0); | ||
| 681 | |||
| 682 | /* Should not happen. */ | ||
| 683 | if (WARN_ON_ONCE(b == max_cnt)) | ||
| 684 | return -ENOSPC; | ||
| 685 | |||
| 686 | /* | ||
| 687 | * Assign counter to event. | ||
| 688 | */ | ||
| 689 | event->hw.idx = b; | ||
| 690 | __set_bit(b, &cpuc->used_mask); | ||
| 691 | |||
| 692 | /* | ||
| 693 | * Start if requested. | ||
| 694 | */ | ||
| 695 | if (flags & PERF_EF_START) | ||
| 696 | tile_pmu_start(event, PERF_EF_RELOAD); | ||
| 697 | |||
| 698 | return 0; | ||
| 699 | } | ||
| 700 | |||
| 701 | /* | ||
| 702 | * Delete a single event from the PMU. | ||
| 703 | * | ||
| 704 | * The event is deleted from the group of enabled events. | ||
| 705 | * If it is the last event, disable PMU interrupt. | ||
| 706 | */ | ||
| 707 | static void tile_pmu_del(struct perf_event *event, int flags) | ||
| 708 | { | ||
| 709 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 710 | int i; | ||
| 711 | |||
| 712 | /* | ||
| 713 | * Remove event from list, compact list if necessary. | ||
| 714 | */ | ||
| 715 | for (i = 0; i < cpuc->n_events; i++) { | ||
| 716 | if (cpuc->event_list[i] == event) { | ||
| 717 | while (++i < cpuc->n_events) | ||
| 718 | cpuc->event_list[i-1] = cpuc->event_list[i]; | ||
| 719 | --cpuc->n_events; | ||
| 720 | cpuc->events[event->hw.idx] = NULL; | ||
| 721 | __clear_bit(event->hw.idx, &cpuc->used_mask); | ||
| 722 | tile_pmu_stop(event, PERF_EF_UPDATE); | ||
| 723 | break; | ||
| 724 | } | ||
| 725 | } | ||
| 726 | /* | ||
| 727 | * If there are no events left, then mask PMU interrupt. | ||
| 728 | */ | ||
| 729 | if (cpuc->n_events == 0) | ||
| 730 | mask_pmc_interrupts(); | ||
| 731 | perf_event_update_userpage(event); | ||
| 732 | } | ||
| 733 | |||
| 734 | /* | ||
| 735 | * Propagate event elapsed time into the event. | ||
| 736 | */ | ||
| 737 | static inline void tile_pmu_read(struct perf_event *event) | ||
| 738 | { | ||
| 739 | tile_perf_event_update(event); | ||
| 740 | } | ||
| 741 | |||
| 742 | /* | ||
| 743 | * Map generic events to Tile PMU. | ||
| 744 | */ | ||
| 745 | static int tile_map_hw_event(u64 config) | ||
| 746 | { | ||
| 747 | if (config >= tile_pmu->max_events) | ||
| 748 | return -EINVAL; | ||
| 749 | return tile_pmu->hw_events[config]; | ||
| 750 | } | ||
| 751 | |||
| 752 | /* | ||
| 753 | * Map generic hardware cache events to Tile PMU. | ||
| 754 | */ | ||
| 755 | static int tile_map_cache_event(u64 config) | ||
| 756 | { | ||
| 757 | unsigned int cache_type, cache_op, cache_result; | ||
| 758 | int code; | ||
| 759 | |||
| 760 | if (!tile_pmu->cache_events) | ||
| 761 | return -ENOENT; | ||
| 762 | |||
| 763 | cache_type = (config >> 0) & 0xff; | ||
| 764 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) | ||
| 765 | return -EINVAL; | ||
| 766 | |||
| 767 | cache_op = (config >> 8) & 0xff; | ||
| 768 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) | ||
| 769 | return -EINVAL; | ||
| 770 | |||
| 771 | cache_result = (config >> 16) & 0xff; | ||
| 772 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
| 773 | return -EINVAL; | ||
| 774 | |||
| 775 | code = (*tile_pmu->cache_events)[cache_type][cache_op][cache_result]; | ||
| 776 | if (code == TILE_OP_UNSUPP) | ||
| 777 | return -EINVAL; | ||
| 778 | |||
| 779 | return code; | ||
| 780 | } | ||
| 781 | |||
| 782 | static void tile_event_destroy(struct perf_event *event) | ||
| 783 | { | ||
| 784 | if (atomic_dec_return(&tile_active_events) == 0) | ||
| 785 | release_pmc_hardware(); | ||
| 786 | } | ||
| 787 | |||
| 788 | static int __tile_event_init(struct perf_event *event) | ||
| 789 | { | ||
| 790 | struct perf_event_attr *attr = &event->attr; | ||
| 791 | struct hw_perf_event *hwc = &event->hw; | ||
| 792 | int code; | ||
| 793 | |||
| 794 | switch (attr->type) { | ||
| 795 | case PERF_TYPE_HARDWARE: | ||
| 796 | code = tile_pmu->map_hw_event(attr->config); | ||
| 797 | break; | ||
| 798 | case PERF_TYPE_HW_CACHE: | ||
| 799 | code = tile_pmu->map_cache_event(attr->config); | ||
| 800 | break; | ||
| 801 | case PERF_TYPE_RAW: | ||
| 802 | code = attr->config & TILE_EVENT_MASK; | ||
| 803 | break; | ||
| 804 | default: | ||
| 805 | /* Should not happen. */ | ||
| 806 | return -EOPNOTSUPP; | ||
| 807 | } | ||
| 808 | |||
| 809 | if (code < 0) | ||
| 810 | return code; | ||
| 811 | |||
| 812 | hwc->config = code; | ||
| 813 | hwc->idx = -1; | ||
| 814 | |||
| 815 | if (attr->exclude_user) | ||
| 816 | hwc->config |= TILE_CTL_EXCL_USER; | ||
| 817 | |||
| 818 | if (attr->exclude_kernel) | ||
| 819 | hwc->config |= TILE_CTL_EXCL_KERNEL; | ||
| 820 | |||
| 821 | if (attr->exclude_hv) | ||
| 822 | hwc->config |= TILE_CTL_EXCL_HV; | ||
| 823 | |||
| 824 | if (!hwc->sample_period) { | ||
| 825 | hwc->sample_period = tile_pmu->max_period; | ||
| 826 | hwc->last_period = hwc->sample_period; | ||
| 827 | local64_set(&hwc->period_left, hwc->sample_period); | ||
| 828 | } | ||
| 829 | event->destroy = tile_event_destroy; | ||
| 830 | return 0; | ||
| 831 | } | ||
| 832 | |||
| 833 | static int tile_event_init(struct perf_event *event) | ||
| 834 | { | ||
| 835 | int err = 0; | ||
| 836 | perf_irq_t old_irq_handler = NULL; | ||
| 837 | |||
| 838 | if (atomic_inc_return(&tile_active_events) == 1) | ||
| 839 | old_irq_handler = reserve_pmc_hardware(tile_pmu_handle_irq); | ||
| 840 | |||
| 841 | if (old_irq_handler) { | ||
| 842 | pr_warn("PMC hardware busy (reserved by oprofile)\n"); | ||
| 843 | |||
| 844 | atomic_dec(&tile_active_events); | ||
| 845 | return -EBUSY; | ||
| 846 | } | ||
| 847 | |||
| 848 | switch (event->attr.type) { | ||
| 849 | case PERF_TYPE_RAW: | ||
| 850 | case PERF_TYPE_HARDWARE: | ||
| 851 | case PERF_TYPE_HW_CACHE: | ||
| 852 | break; | ||
| 853 | |||
| 854 | default: | ||
| 855 | return -ENOENT; | ||
| 856 | } | ||
| 857 | |||
| 858 | err = __tile_event_init(event); | ||
| 859 | if (err) { | ||
| 860 | if (event->destroy) | ||
| 861 | event->destroy(event); | ||
| 862 | } | ||
| 863 | return err; | ||
| 864 | } | ||
| 865 | |||
| 866 | static struct pmu tilera_pmu = { | ||
| 867 | .event_init = tile_event_init, | ||
| 868 | .add = tile_pmu_add, | ||
| 869 | .del = tile_pmu_del, | ||
| 870 | |||
| 871 | .start = tile_pmu_start, | ||
| 872 | .stop = tile_pmu_stop, | ||
| 873 | |||
| 874 | .read = tile_pmu_read, | ||
| 875 | }; | ||
| 876 | |||
| 877 | /* | ||
| 878 | * PMU's IRQ handler, PMU has 2 interrupts, they share the same handler. | ||
| 879 | */ | ||
| 880 | int tile_pmu_handle_irq(struct pt_regs *regs, int fault) | ||
| 881 | { | ||
| 882 | struct perf_sample_data data; | ||
| 883 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 884 | struct perf_event *event; | ||
| 885 | struct hw_perf_event *hwc; | ||
| 886 | u64 val; | ||
| 887 | unsigned long status; | ||
| 888 | int bit; | ||
| 889 | |||
| 890 | __get_cpu_var(perf_irqs)++; | ||
| 891 | |||
| 892 | if (!atomic_read(&tile_active_events)) | ||
| 893 | return 0; | ||
| 894 | |||
| 895 | status = pmc_get_overflow(); | ||
| 896 | pmc_ack_overflow(status); | ||
| 897 | |||
| 898 | for_each_set_bit(bit, &status, tile_pmu->num_counters) { | ||
| 899 | |||
| 900 | event = cpuc->events[bit]; | ||
| 901 | |||
| 902 | if (!event) | ||
| 903 | continue; | ||
| 904 | |||
| 905 | if (!test_bit(bit, cpuc->active_mask)) | ||
| 906 | continue; | ||
| 907 | |||
| 908 | hwc = &event->hw; | ||
| 909 | |||
| 910 | val = tile_perf_event_update(event); | ||
| 911 | if (val & (1ULL << (tile_pmu->cntval_bits - 1))) | ||
| 912 | continue; | ||
| 913 | |||
| 914 | perf_sample_data_init(&data, 0, event->hw.last_period); | ||
| 915 | if (!tile_event_set_period(event)) | ||
| 916 | continue; | ||
| 917 | |||
| 918 | if (perf_event_overflow(event, &data, regs)) | ||
| 919 | tile_pmu_stop(event, 0); | ||
| 920 | } | ||
| 921 | |||
| 922 | return 0; | ||
| 923 | } | ||
| 924 | |||
| 925 | static bool __init supported_pmu(void) | ||
| 926 | { | ||
| 927 | tile_pmu = &tilepmu; | ||
| 928 | return true; | ||
| 929 | } | ||
| 930 | |||
| 931 | int __init init_hw_perf_events(void) | ||
| 932 | { | ||
| 933 | supported_pmu(); | ||
| 934 | perf_pmu_register(&tilera_pmu, "cpu", PERF_TYPE_RAW); | ||
| 935 | return 0; | ||
| 936 | } | ||
| 937 | arch_initcall(init_hw_perf_events); | ||
| 938 | |||
| 939 | /* Callchain handling code. */ | ||
| 940 | |||
| 941 | /* | ||
| 942 | * Tile specific backtracing code for perf_events. | ||
| 943 | */ | ||
| 944 | static inline void perf_callchain(struct perf_callchain_entry *entry, | ||
| 945 | struct pt_regs *regs) | ||
| 946 | { | ||
| 947 | struct KBacktraceIterator kbt; | ||
| 948 | unsigned int i; | ||
| 949 | |||
| 950 | /* | ||
| 951 | * Get the address just after the "jalr" instruction that | ||
| 952 | * jumps to the handler for a syscall. When we find this | ||
| 953 | * address in a backtrace, we silently ignore it, which gives | ||
| 954 | * us a one-step backtrace connection from the sys_xxx() | ||
| 955 | * function in the kernel to the xxx() function in libc. | ||
| 956 | * Otherwise, we lose the ability to properly attribute time | ||
| 957 | * from the libc calls to the kernel implementations, since | ||
| 958 | * oprofile only considers PCs from backtraces a pair at a time. | ||
| 959 | */ | ||
| 960 | unsigned long handle_syscall_pc = handle_syscall_link_address(); | ||
| 961 | |||
| 962 | KBacktraceIterator_init(&kbt, NULL, regs); | ||
| 963 | kbt.profile = 1; | ||
| 964 | |||
| 965 | /* | ||
| 966 | * The sample for the pc is already recorded. Now we are adding the | ||
| 967 | * address of the callsites on the stack. Our iterator starts | ||
| 968 | * with the frame of the (already sampled) call site. If our | ||
| 969 | * iterator contained a "return address" field, we could have just | ||
| 970 | * used it and wouldn't have needed to skip the first | ||
| 971 | * frame. That's in effect what the arm and x86 versions do. | ||
| 972 | * Instead we peel off the first iteration to get the equivalent | ||
| 973 | * behavior. | ||
| 974 | */ | ||
| 975 | |||
| 976 | if (KBacktraceIterator_end(&kbt)) | ||
| 977 | return; | ||
| 978 | KBacktraceIterator_next(&kbt); | ||
| 979 | |||
| 980 | /* | ||
| 981 | * Set stack depth to 16 for user and kernel space respectively, that | ||
| 982 | * is, total 32 stack frames. | ||
| 983 | */ | ||
| 984 | for (i = 0; i < 16; ++i) { | ||
| 985 | unsigned long pc; | ||
| 986 | if (KBacktraceIterator_end(&kbt)) | ||
| 987 | break; | ||
| 988 | pc = kbt.it.pc; | ||
| 989 | if (pc != handle_syscall_pc) | ||
| 990 | perf_callchain_store(entry, pc); | ||
| 991 | KBacktraceIterator_next(&kbt); | ||
| 992 | } | ||
| 993 | } | ||
| 994 | |||
| 995 | void perf_callchain_user(struct perf_callchain_entry *entry, | ||
| 996 | struct pt_regs *regs) | ||
| 997 | { | ||
| 998 | perf_callchain(entry, regs); | ||
| 999 | } | ||
| 1000 | |||
| 1001 | void perf_callchain_kernel(struct perf_callchain_entry *entry, | ||
| 1002 | struct pt_regs *regs) | ||
| 1003 | { | ||
| 1004 | perf_callchain(entry, regs); | ||
| 1005 | } | ||
