aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc/kernel/perf_event.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-09-21 06:02:48 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-21 08:28:04 -0400
commitcdd6c482c9ff9c55475ee7392ec8f672eddb7be6 (patch)
tree81f98a3ab46c589792057fe2392c1e10f8ad7893 /arch/sparc/kernel/perf_event.c
parentdfc65094d0313cc48969fa60bcf33d693aeb05a7 (diff)
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events! In the past few months the perfcounters subsystem has grown out its initial role of counting hardware events, and has become (and is becoming) a much broader generic event enumeration, reporting, logging, monitoring, analysis facility. Naming its core object 'perf_counter' and naming the subsystem 'perfcounters' has become more and more of a misnomer. With pending code like hw-breakpoints support the 'counter' name is less and less appropriate. All in one, we've decided to rename the subsystem to 'performance events' and to propagate this rename through all fields, variables and API names. (in an ABI compatible fashion) The word 'event' is also a bit shorter than 'counter' - which makes it slightly more convenient to write/handle as well. Thanks goes to Stephane Eranian who first observed this misnomer and suggested a rename. User-space tooling and ABI compatibility is not affected - this patch should be function-invariant. (Also, defconfigs were not touched to keep the size down.) This patch has been generated via the following script: FILES=$(find * -type f | grep -vE 'oprofile|[^K]config') sed -i \ -e 's/PERF_EVENT_/PERF_RECORD_/g' \ -e 's/PERF_COUNTER/PERF_EVENT/g' \ -e 's/perf_counter/perf_event/g' \ -e 's/nb_counters/nb_events/g' \ -e 's/swcounter/swevent/g' \ -e 's/tpcounter_event/tp_event/g' \ $FILES for N in $(find . -name perf_counter.[ch]); do M=$(echo $N | sed 's/perf_counter/perf_event/g') mv $N $M done FILES=$(find . -name perf_event.*) sed -i \ -e 's/COUNTER_MASK/REG_MASK/g' \ -e 's/COUNTER/EVENT/g' \ -e 's/\<event\>/event_id/g' \ -e 's/counter/event/g' \ -e 's/Counter/Event/g' \ $FILES ... to keep it as correct as possible. This script can also be used by anyone who has pending perfcounters patches - it converts a Linux kernel tree over to the new naming. We tried to time this change to the point in time where the amount of pending patches is the smallest: the end of the merge window. Namespace clashes were fixed up in a preparatory patch - and some stylistic fallout will be fixed up in a subsequent patch. ( NOTE: 'counters' are still the proper terminology when we deal with hardware registers - and these sed scripts are a bit over-eager in renaming them. I've undone some of that, but in case there's something left where 'counter' would be better than 'event' we can undo that on an individual basis instead of touching an otherwise nicely automated patch. ) Suggested-by: Stephane Eranian <eranian@google.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Paul Mackerras <paulus@samba.org> Reviewed-by: Arjan van de Ven <arjan@linux.intel.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Howells <dhowells@redhat.com> Cc: Kyle McMartin <kyle@mcmartin.ca> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: <linux-arch@vger.kernel.org> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/sparc/kernel/perf_event.c')
-rw-r--r--arch/sparc/kernel/perf_event.c556
1 files changed, 556 insertions, 0 deletions
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
new file mode 100644
index 000000000000..2d6a1b10c81d
--- /dev/null
+++ b/arch/sparc/kernel/perf_event.c
@@ -0,0 +1,556 @@
1/* Performance event support for sparc64.
2 *
3 * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
4 *
5 * This code is based almost entirely upon the x86 perf event
6 * code, which is:
7 *
8 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
9 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
10 * Copyright (C) 2009 Jaswinder Singh Rajput
11 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
12 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
13 */
14
15#include <linux/perf_event.h>
16#include <linux/kprobes.h>
17#include <linux/kernel.h>
18#include <linux/kdebug.h>
19#include <linux/mutex.h>
20
21#include <asm/cpudata.h>
22#include <asm/atomic.h>
23#include <asm/nmi.h>
24#include <asm/pcr.h>
25
26/* Sparc64 chips have two performance counters, 32-bits each, with
27 * overflow interrupts generated on transition from 0xffffffff to 0.
28 * The counters are accessed in one go using a 64-bit register.
29 *
30 * Both counters are controlled using a single control register. The
31 * only way to stop all sampling is to clear all of the context (user,
32 * supervisor, hypervisor) sampling enable bits. But these bits apply
33 * to both counters, thus the two counters can't be enabled/disabled
34 * individually.
35 *
36 * The control register has two event fields, one for each of the two
37 * counters. It's thus nearly impossible to have one counter going
38 * while keeping the other one stopped. Therefore it is possible to
39 * get overflow interrupts for counters not currently "in use" and
40 * that condition must be checked in the overflow interrupt handler.
41 *
42 * So we use a hack, in that we program inactive counters with the
43 * "sw_count0" and "sw_count1" events. These count how many times
44 * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an
45 * unusual way to encode a NOP and therefore will not trigger in
46 * normal code.
47 */
48
49#define MAX_HWEVENTS 2
50#define MAX_PERIOD ((1UL << 32) - 1)
51
52#define PIC_UPPER_INDEX 0
53#define PIC_LOWER_INDEX 1
54
55struct cpu_hw_events {
56 struct perf_event *events[MAX_HWEVENTS];
57 unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
58 unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
59 int enabled;
60};
61DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
62
63struct perf_event_map {
64 u16 encoding;
65 u8 pic_mask;
66#define PIC_NONE 0x00
67#define PIC_UPPER 0x01
68#define PIC_LOWER 0x02
69};
70
71struct sparc_pmu {
72 const struct perf_event_map *(*event_map)(int);
73 int max_events;
74 int upper_shift;
75 int lower_shift;
76 int event_mask;
77 int hv_bit;
78 int irq_bit;
79 int upper_nop;
80 int lower_nop;
81};
82
83static const struct perf_event_map ultra3i_perfmon_event_map[] = {
84 [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
85 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
86 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
87 [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
88};
89
90static const struct perf_event_map *ultra3i_event_map(int event_id)
91{
92 return &ultra3i_perfmon_event_map[event_id];
93}
94
95static const struct sparc_pmu ultra3i_pmu = {
96 .event_map = ultra3i_event_map,
97 .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map),
98 .upper_shift = 11,
99 .lower_shift = 4,
100 .event_mask = 0x3f,
101 .upper_nop = 0x1c,
102 .lower_nop = 0x14,
103};
104
105static const struct perf_event_map niagara2_perfmon_event_map[] = {
106 [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER },
107 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER },
108 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER },
109 [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER },
110 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER },
111 [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER },
112};
113
114static const struct perf_event_map *niagara2_event_map(int event_id)
115{
116 return &niagara2_perfmon_event_map[event_id];
117}
118
119static const struct sparc_pmu niagara2_pmu = {
120 .event_map = niagara2_event_map,
121 .max_events = ARRAY_SIZE(niagara2_perfmon_event_map),
122 .upper_shift = 19,
123 .lower_shift = 6,
124 .event_mask = 0xfff,
125 .hv_bit = 0x8,
126 .irq_bit = 0x03,
127 .upper_nop = 0x220,
128 .lower_nop = 0x220,
129};
130
131static const struct sparc_pmu *sparc_pmu __read_mostly;
132
133static u64 event_encoding(u64 event_id, int idx)
134{
135 if (idx == PIC_UPPER_INDEX)
136 event_id <<= sparc_pmu->upper_shift;
137 else
138 event_id <<= sparc_pmu->lower_shift;
139 return event_id;
140}
141
142static u64 mask_for_index(int idx)
143{
144 return event_encoding(sparc_pmu->event_mask, idx);
145}
146
147static u64 nop_for_index(int idx)
148{
149 return event_encoding(idx == PIC_UPPER_INDEX ?
150 sparc_pmu->upper_nop :
151 sparc_pmu->lower_nop, idx);
152}
153
154static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc,
155 int idx)
156{
157 u64 val, mask = mask_for_index(idx);
158
159 val = pcr_ops->read();
160 pcr_ops->write((val & ~mask) | hwc->config);
161}
162
163static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc,
164 int idx)
165{
166 u64 mask = mask_for_index(idx);
167 u64 nop = nop_for_index(idx);
168 u64 val = pcr_ops->read();
169
170 pcr_ops->write((val & ~mask) | nop);
171}
172
173void hw_perf_enable(void)
174{
175 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
176 u64 val;
177 int i;
178
179 if (cpuc->enabled)
180 return;
181
182 cpuc->enabled = 1;
183 barrier();
184
185 val = pcr_ops->read();
186
187 for (i = 0; i < MAX_HWEVENTS; i++) {
188 struct perf_event *cp = cpuc->events[i];
189 struct hw_perf_event *hwc;
190
191 if (!cp)
192 continue;
193 hwc = &cp->hw;
194 val |= hwc->config_base;
195 }
196
197 pcr_ops->write(val);
198}
199
200void hw_perf_disable(void)
201{
202 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
203 u64 val;
204
205 if (!cpuc->enabled)
206 return;
207
208 cpuc->enabled = 0;
209
210 val = pcr_ops->read();
211 val &= ~(PCR_UTRACE | PCR_STRACE |
212 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
213 pcr_ops->write(val);
214}
215
216static u32 read_pmc(int idx)
217{
218 u64 val;
219
220 read_pic(val);
221 if (idx == PIC_UPPER_INDEX)
222 val >>= 32;
223
224 return val & 0xffffffff;
225}
226
227static void write_pmc(int idx, u64 val)
228{
229 u64 shift, mask, pic;
230
231 shift = 0;
232 if (idx == PIC_UPPER_INDEX)
233 shift = 32;
234
235 mask = ((u64) 0xffffffff) << shift;
236 val <<= shift;
237
238 read_pic(pic);
239 pic &= ~mask;
240 pic |= val;
241 write_pic(pic);
242}
243
244static int sparc_perf_event_set_period(struct perf_event *event,
245 struct hw_perf_event *hwc, int idx)
246{
247 s64 left = atomic64_read(&hwc->period_left);
248 s64 period = hwc->sample_period;
249 int ret = 0;
250
251 if (unlikely(left <= -period)) {
252 left = period;
253 atomic64_set(&hwc->period_left, left);
254 hwc->last_period = period;
255 ret = 1;
256 }
257
258 if (unlikely(left <= 0)) {
259 left += period;
260 atomic64_set(&hwc->period_left, left);
261 hwc->last_period = period;
262 ret = 1;
263 }
264 if (left > MAX_PERIOD)
265 left = MAX_PERIOD;
266
267 atomic64_set(&hwc->prev_count, (u64)-left);
268
269 write_pmc(idx, (u64)(-left) & 0xffffffff);
270
271 perf_event_update_userpage(event);
272
273 return ret;
274}
275
276static int sparc_pmu_enable(struct perf_event *event)
277{
278 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
279 struct hw_perf_event *hwc = &event->hw;
280 int idx = hwc->idx;
281
282 if (test_and_set_bit(idx, cpuc->used_mask))
283 return -EAGAIN;
284
285 sparc_pmu_disable_event(hwc, idx);
286
287 cpuc->events[idx] = event;
288 set_bit(idx, cpuc->active_mask);
289
290 sparc_perf_event_set_period(event, hwc, idx);
291 sparc_pmu_enable_event(hwc, idx);
292 perf_event_update_userpage(event);
293 return 0;
294}
295
296static u64 sparc_perf_event_update(struct perf_event *event,
297 struct hw_perf_event *hwc, int idx)
298{
299 int shift = 64 - 32;
300 u64 prev_raw_count, new_raw_count;
301 s64 delta;
302
303again:
304 prev_raw_count = atomic64_read(&hwc->prev_count);
305 new_raw_count = read_pmc(idx);
306
307 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
308 new_raw_count) != prev_raw_count)
309 goto again;
310
311 delta = (new_raw_count << shift) - (prev_raw_count << shift);
312 delta >>= shift;
313
314 atomic64_add(delta, &event->count);
315 atomic64_sub(delta, &hwc->period_left);
316
317 return new_raw_count;
318}
319
320static void sparc_pmu_disable(struct perf_event *event)
321{
322 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
323 struct hw_perf_event *hwc = &event->hw;
324 int idx = hwc->idx;
325
326 clear_bit(idx, cpuc->active_mask);
327 sparc_pmu_disable_event(hwc, idx);
328
329 barrier();
330
331 sparc_perf_event_update(event, hwc, idx);
332 cpuc->events[idx] = NULL;
333 clear_bit(idx, cpuc->used_mask);
334
335 perf_event_update_userpage(event);
336}
337
338static void sparc_pmu_read(struct perf_event *event)
339{
340 struct hw_perf_event *hwc = &event->hw;
341 sparc_perf_event_update(event, hwc, hwc->idx);
342}
343
344static void sparc_pmu_unthrottle(struct perf_event *event)
345{
346 struct hw_perf_event *hwc = &event->hw;
347 sparc_pmu_enable_event(hwc, hwc->idx);
348}
349
350static atomic_t active_events = ATOMIC_INIT(0);
351static DEFINE_MUTEX(pmc_grab_mutex);
352
353void perf_event_grab_pmc(void)
354{
355 if (atomic_inc_not_zero(&active_events))
356 return;
357
358 mutex_lock(&pmc_grab_mutex);
359 if (atomic_read(&active_events) == 0) {
360 if (atomic_read(&nmi_active) > 0) {
361 on_each_cpu(stop_nmi_watchdog, NULL, 1);
362 BUG_ON(atomic_read(&nmi_active) != 0);
363 }
364 atomic_inc(&active_events);
365 }
366 mutex_unlock(&pmc_grab_mutex);
367}
368
369void perf_event_release_pmc(void)
370{
371 if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) {
372 if (atomic_read(&nmi_active) == 0)
373 on_each_cpu(start_nmi_watchdog, NULL, 1);
374 mutex_unlock(&pmc_grab_mutex);
375 }
376}
377
378static void hw_perf_event_destroy(struct perf_event *event)
379{
380 perf_event_release_pmc();
381}
382
383static int __hw_perf_event_init(struct perf_event *event)
384{
385 struct perf_event_attr *attr = &event->attr;
386 struct hw_perf_event *hwc = &event->hw;
387 const struct perf_event_map *pmap;
388 u64 enc;
389
390 if (atomic_read(&nmi_active) < 0)
391 return -ENODEV;
392
393 if (attr->type != PERF_TYPE_HARDWARE)
394 return -EOPNOTSUPP;
395
396 if (attr->config >= sparc_pmu->max_events)
397 return -EINVAL;
398
399 perf_event_grab_pmc();
400 event->destroy = hw_perf_event_destroy;
401
402 /* We save the enable bits in the config_base. So to
403 * turn off sampling just write 'config', and to enable
404 * things write 'config | config_base'.
405 */
406 hwc->config_base = sparc_pmu->irq_bit;
407 if (!attr->exclude_user)
408 hwc->config_base |= PCR_UTRACE;
409 if (!attr->exclude_kernel)
410 hwc->config_base |= PCR_STRACE;
411 if (!attr->exclude_hv)
412 hwc->config_base |= sparc_pmu->hv_bit;
413
414 if (!hwc->sample_period) {
415 hwc->sample_period = MAX_PERIOD;
416 hwc->last_period = hwc->sample_period;
417 atomic64_set(&hwc->period_left, hwc->sample_period);
418 }
419
420 pmap = sparc_pmu->event_map(attr->config);
421
422 enc = pmap->encoding;
423 if (pmap->pic_mask & PIC_UPPER) {
424 hwc->idx = PIC_UPPER_INDEX;
425 enc <<= sparc_pmu->upper_shift;
426 } else {
427 hwc->idx = PIC_LOWER_INDEX;
428 enc <<= sparc_pmu->lower_shift;
429 }
430
431 hwc->config |= enc;
432 return 0;
433}
434
435static const struct pmu pmu = {
436 .enable = sparc_pmu_enable,
437 .disable = sparc_pmu_disable,
438 .read = sparc_pmu_read,
439 .unthrottle = sparc_pmu_unthrottle,
440};
441
442const struct pmu *hw_perf_event_init(struct perf_event *event)
443{
444 int err = __hw_perf_event_init(event);
445
446 if (err)
447 return ERR_PTR(err);
448 return &pmu;
449}
450
451void perf_event_print_debug(void)
452{
453 unsigned long flags;
454 u64 pcr, pic;
455 int cpu;
456
457 if (!sparc_pmu)
458 return;
459
460 local_irq_save(flags);
461
462 cpu = smp_processor_id();
463
464 pcr = pcr_ops->read();
465 read_pic(pic);
466
467 pr_info("\n");
468 pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
469 cpu, pcr, pic);
470
471 local_irq_restore(flags);
472}
473
474static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
475 unsigned long cmd, void *__args)
476{
477 struct die_args *args = __args;
478 struct perf_sample_data data;
479 struct cpu_hw_events *cpuc;
480 struct pt_regs *regs;
481 int idx;
482
483 if (!atomic_read(&active_events))
484 return NOTIFY_DONE;
485
486 switch (cmd) {
487 case DIE_NMI:
488 break;
489
490 default:
491 return NOTIFY_DONE;
492 }
493
494 regs = args->regs;
495
496 data.addr = 0;
497
498 cpuc = &__get_cpu_var(cpu_hw_events);
499 for (idx = 0; idx < MAX_HWEVENTS; idx++) {
500 struct perf_event *event = cpuc->events[idx];
501 struct hw_perf_event *hwc;
502 u64 val;
503
504 if (!test_bit(idx, cpuc->active_mask))
505 continue;
506 hwc = &event->hw;
507 val = sparc_perf_event_update(event, hwc, idx);
508 if (val & (1ULL << 31))
509 continue;
510
511 data.period = event->hw.last_period;
512 if (!sparc_perf_event_set_period(event, hwc, idx))
513 continue;
514
515 if (perf_event_overflow(event, 1, &data, regs))
516 sparc_pmu_disable_event(hwc, idx);
517 }
518
519 return NOTIFY_STOP;
520}
521
522static __read_mostly struct notifier_block perf_event_nmi_notifier = {
523 .notifier_call = perf_event_nmi_handler,
524};
525
526static bool __init supported_pmu(void)
527{
528 if (!strcmp(sparc_pmu_type, "ultra3i")) {
529 sparc_pmu = &ultra3i_pmu;
530 return true;
531 }
532 if (!strcmp(sparc_pmu_type, "niagara2")) {
533 sparc_pmu = &niagara2_pmu;
534 return true;
535 }
536 return false;
537}
538
539void __init init_hw_perf_events(void)
540{
541 pr_info("Performance events: ");
542
543 if (!supported_pmu()) {
544 pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
545 return;
546 }
547
548 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
549
550 /* All sparc64 PMUs currently have 2 events. But this simple
551 * driver only supports one active event at a time.
552 */
553 perf_max_events = 1;
554
555 register_die_notifier(&perf_event_nmi_notifier);
556}