aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/powerpc/include/asm/paca.h2
-rw-r--r--arch/powerpc/kernel/perf_event.c12
-rw-r--r--arch/x86/kernel/cpu/perf_event.c14
-rw-r--r--include/linux/perf_counter.h441
-rw-r--r--include/linux/perf_event.h98
-rw-r--r--init/Kconfig37
-rw-r--r--kernel/perf_event.c4
8 files changed, 534 insertions, 76 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 43761a00e3f1..751a307dc44e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4000,7 +4000,7 @@ S: Maintained
4000F: include/linux/delayacct.h 4000F: include/linux/delayacct.h
4001F: kernel/delayacct.c 4001F: kernel/delayacct.c
4002 4002
4003PERFORMANCE COUNTER SUBSYSTEM 4003PERFORMANCE EVENTS SUBSYSTEM
4004M: Peter Zijlstra <a.p.zijlstra@chello.nl> 4004M: Peter Zijlstra <a.p.zijlstra@chello.nl>
4005M: Paul Mackerras <paulus@samba.org> 4005M: Paul Mackerras <paulus@samba.org>
4006M: Ingo Molnar <mingo@elte.hu> 4006M: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 154f405b642f..7d8514ceceae 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -122,7 +122,7 @@ struct paca_struct {
122 u8 soft_enabled; /* irq soft-enable flag */ 122 u8 soft_enabled; /* irq soft-enable flag */
123 u8 hard_enabled; /* set if irqs are enabled in MSR */ 123 u8 hard_enabled; /* set if irqs are enabled in MSR */
124 u8 io_sync; /* writel() needs spin_unlock sync */ 124 u8 io_sync; /* writel() needs spin_unlock sync */
125 u8 perf_event_pending; /* PM interrupt while soft-disabled */ 125 u8 perf_event_pending; /* PM interrupt while soft-disabled */
126 126
127 /* Stuff for accurate time accounting */ 127 /* Stuff for accurate time accounting */
128 u64 user_time; /* accumulated usermode TB ticks */ 128 u64 user_time; /* accumulated usermode TB ticks */
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index c98321fcb459..197b7d958796 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -41,7 +41,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
41struct power_pmu *ppmu; 41struct power_pmu *ppmu;
42 42
43/* 43/*
44 * Normally, to ignore kernel events we set the FCS (freeze events 44 * Normally, to ignore kernel events we set the FCS (freeze counters
45 * in supervisor mode) bit in MMCR0, but if the kernel runs with the 45 * in supervisor mode) bit in MMCR0, but if the kernel runs with the
46 * hypervisor bit set in the MSR, or if we are running on a processor 46 * hypervisor bit set in the MSR, or if we are running on a processor
47 * where the hypervisor bit is forced to 1 (as on Apple G5 processors), 47 * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
@@ -159,7 +159,7 @@ void perf_event_print_debug(void)
159} 159}
160 160
161/* 161/*
162 * Read one performance monitor event (PMC). 162 * Read one performance monitor counter (PMC).
163 */ 163 */
164static unsigned long read_pmc(int idx) 164static unsigned long read_pmc(int idx)
165{ 165{
@@ -409,7 +409,7 @@ static void power_pmu_read(struct perf_event *event)
409 val = read_pmc(event->hw.idx); 409 val = read_pmc(event->hw.idx);
410 } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); 410 } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
411 411
412 /* The events are only 32 bits wide */ 412 /* The counters are only 32 bits wide */
413 delta = (val - prev) & 0xfffffffful; 413 delta = (val - prev) & 0xfffffffful;
414 atomic64_add(delta, &event->count); 414 atomic64_add(delta, &event->count);
415 atomic64_sub(delta, &event->hw.period_left); 415 atomic64_sub(delta, &event->hw.period_left);
@@ -543,7 +543,7 @@ void hw_perf_disable(void)
543 } 543 }
544 544
545 /* 545 /*
546 * Set the 'freeze events' bit. 546 * Set the 'freeze counters' bit.
547 * The barrier is to make sure the mtspr has been 547 * The barrier is to make sure the mtspr has been
548 * executed and the PMU has frozen the events 548 * executed and the PMU has frozen the events
549 * before we return. 549 * before we return.
@@ -1124,7 +1124,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1124} 1124}
1125 1125
1126/* 1126/*
1127 * A event has overflowed; update its count and record 1127 * A counter has overflowed; update its count and record
1128 * things if requested. Note that interrupts are hard-disabled 1128 * things if requested. Note that interrupts are hard-disabled
1129 * here so there is no possibility of being interrupted. 1129 * here so there is no possibility of being interrupted.
1130 */ 1130 */
@@ -1271,7 +1271,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
1271 1271
1272 /* 1272 /*
1273 * Reset MMCR0 to its normal value. This will set PMXE and 1273 * Reset MMCR0 to its normal value. This will set PMXE and
1274 * clear FC (freeze events) and PMAO (perf mon alert occurred) 1274 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
1275 * and thus allow interrupts to occur again. 1275 * and thus allow interrupts to occur again.
1276 * XXX might want to use MSR.PM to keep the events frozen until 1276 * XXX might want to use MSR.PM to keep the events frozen until
1277 * we get back out of this interrupt. 1277 * we get back out of this interrupt.
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 0d03629fb1a5..a3c7adb06b78 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2081,13 +2081,13 @@ void __init init_hw_perf_events(void)
2081 perf_events_lapic_init(); 2081 perf_events_lapic_init();
2082 register_die_notifier(&perf_event_nmi_notifier); 2082 register_die_notifier(&perf_event_nmi_notifier);
2083 2083
2084 pr_info("... version: %d\n", x86_pmu.version); 2084 pr_info("... version: %d\n", x86_pmu.version);
2085 pr_info("... bit width: %d\n", x86_pmu.event_bits); 2085 pr_info("... bit width: %d\n", x86_pmu.event_bits);
2086 pr_info("... generic events: %d\n", x86_pmu.num_events); 2086 pr_info("... generic registers: %d\n", x86_pmu.num_events);
2087 pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); 2087 pr_info("... value mask: %016Lx\n", x86_pmu.event_mask);
2088 pr_info("... max period: %016Lx\n", x86_pmu.max_period); 2088 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
2089 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); 2089 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed);
2090 pr_info("... event mask: %016Lx\n", perf_event_mask); 2090 pr_info("... event mask: %016Lx\n", perf_event_mask);
2091} 2091}
2092 2092
2093static inline void x86_pmu_read(struct perf_event *event) 2093static inline void x86_pmu_read(struct perf_event *event)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
new file mode 100644
index 000000000000..368bd70f1d2d
--- /dev/null
+++ b/include/linux/perf_counter.h
@@ -0,0 +1,441 @@
1/*
2 * NOTE: this file will be removed in a future kernel release, it is
3 * provided as a courtesy copy of user-space code that relies on the
4 * old (pre-rename) symbols and constants.
5 *
6 * Performance events:
7 *
8 * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
9 * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
10 * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
11 *
12 * Data type definitions, declarations, prototypes.
13 *
14 * Started by: Thomas Gleixner and Ingo Molnar
15 *
16 * For licencing details see kernel-base/COPYING
17 */
18#ifndef _LINUX_PERF_COUNTER_H
19#define _LINUX_PERF_COUNTER_H
20
21#include <linux/types.h>
22#include <linux/ioctl.h>
23#include <asm/byteorder.h>
24
25/*
26 * User-space ABI bits:
27 */
28
29/*
30 * attr.type
31 */
32enum perf_type_id {
33 PERF_TYPE_HARDWARE = 0,
34 PERF_TYPE_SOFTWARE = 1,
35 PERF_TYPE_TRACEPOINT = 2,
36 PERF_TYPE_HW_CACHE = 3,
37 PERF_TYPE_RAW = 4,
38
39 PERF_TYPE_MAX, /* non-ABI */
40};
41
42/*
43 * Generalized performance counter event types, used by the
44 * attr.event_id parameter of the sys_perf_counter_open()
45 * syscall:
46 */
47enum perf_hw_id {
48 /*
49 * Common hardware events, generalized by the kernel:
50 */
51 PERF_COUNT_HW_CPU_CYCLES = 0,
52 PERF_COUNT_HW_INSTRUCTIONS = 1,
53 PERF_COUNT_HW_CACHE_REFERENCES = 2,
54 PERF_COUNT_HW_CACHE_MISSES = 3,
55 PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
56 PERF_COUNT_HW_BRANCH_MISSES = 5,
57 PERF_COUNT_HW_BUS_CYCLES = 6,
58
59 PERF_COUNT_HW_MAX, /* non-ABI */
60};
61
62/*
63 * Generalized hardware cache counters:
64 *
65 * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
66 * { read, write, prefetch } x
67 * { accesses, misses }
68 */
69enum perf_hw_cache_id {
70 PERF_COUNT_HW_CACHE_L1D = 0,
71 PERF_COUNT_HW_CACHE_L1I = 1,
72 PERF_COUNT_HW_CACHE_LL = 2,
73 PERF_COUNT_HW_CACHE_DTLB = 3,
74 PERF_COUNT_HW_CACHE_ITLB = 4,
75 PERF_COUNT_HW_CACHE_BPU = 5,
76
77 PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
78};
79
80enum perf_hw_cache_op_id {
81 PERF_COUNT_HW_CACHE_OP_READ = 0,
82 PERF_COUNT_HW_CACHE_OP_WRITE = 1,
83 PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
84
85 PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
86};
87
88enum perf_hw_cache_op_result_id {
89 PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0,
90 PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
91
92 PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
93};
94
95/*
96 * Special "software" counters provided by the kernel, even if the hardware
97 * does not support performance counters. These counters measure various
98 * physical and sw events of the kernel (and allow the profiling of them as
99 * well):
100 */
101enum perf_sw_ids {
102 PERF_COUNT_SW_CPU_CLOCK = 0,
103 PERF_COUNT_SW_TASK_CLOCK = 1,
104 PERF_COUNT_SW_PAGE_FAULTS = 2,
105 PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
106 PERF_COUNT_SW_CPU_MIGRATIONS = 4,
107 PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
108 PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
109
110 PERF_COUNT_SW_MAX, /* non-ABI */
111};
112
113/*
114 * Bits that can be set in attr.sample_type to request information
115 * in the overflow packets.
116 */
117enum perf_counter_sample_format {
118 PERF_SAMPLE_IP = 1U << 0,
119 PERF_SAMPLE_TID = 1U << 1,
120 PERF_SAMPLE_TIME = 1U << 2,
121 PERF_SAMPLE_ADDR = 1U << 3,
122 PERF_SAMPLE_READ = 1U << 4,
123 PERF_SAMPLE_CALLCHAIN = 1U << 5,
124 PERF_SAMPLE_ID = 1U << 6,
125 PERF_SAMPLE_CPU = 1U << 7,
126 PERF_SAMPLE_PERIOD = 1U << 8,
127 PERF_SAMPLE_STREAM_ID = 1U << 9,
128 PERF_SAMPLE_RAW = 1U << 10,
129
130 PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */
131};
132
133/*
134 * The format of the data returned by read() on a perf counter fd,
135 * as specified by attr.read_format:
136 *
137 * struct read_format {
138 * { u64 value;
139 * { u64 time_enabled; } && PERF_FORMAT_ENABLED
140 * { u64 time_running; } && PERF_FORMAT_RUNNING
141 * { u64 id; } && PERF_FORMAT_ID
142 * } && !PERF_FORMAT_GROUP
143 *
144 * { u64 nr;
145 * { u64 time_enabled; } && PERF_FORMAT_ENABLED
146 * { u64 time_running; } && PERF_FORMAT_RUNNING
147 * { u64 value;
148 * { u64 id; } && PERF_FORMAT_ID
149 * } cntr[nr];
150 * } && PERF_FORMAT_GROUP
151 * };
152 */
153enum perf_counter_read_format {
154 PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
155 PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
156 PERF_FORMAT_ID = 1U << 2,
157 PERF_FORMAT_GROUP = 1U << 3,
158
159 PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
160};
161
162#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
163
164/*
165 * Hardware event to monitor via a performance monitoring counter:
166 */
167struct perf_counter_attr {
168
169 /*
170 * Major type: hardware/software/tracepoint/etc.
171 */
172 __u32 type;
173
174 /*
175 * Size of the attr structure, for fwd/bwd compat.
176 */
177 __u32 size;
178
179 /*
180 * Type specific configuration information.
181 */
182 __u64 config;
183
184 union {
185 __u64 sample_period;
186 __u64 sample_freq;
187 };
188
189 __u64 sample_type;
190 __u64 read_format;
191
192 __u64 disabled : 1, /* off by default */
193 inherit : 1, /* children inherit it */
194 pinned : 1, /* must always be on PMU */
195 exclusive : 1, /* only group on PMU */
196 exclude_user : 1, /* don't count user */
197 exclude_kernel : 1, /* ditto kernel */
198 exclude_hv : 1, /* ditto hypervisor */
199 exclude_idle : 1, /* don't count when idle */
200 mmap : 1, /* include mmap data */
201 comm : 1, /* include comm data */
202 freq : 1, /* use freq, not period */
203 inherit_stat : 1, /* per task counts */
204 enable_on_exec : 1, /* next exec enables */
205 task : 1, /* trace fork/exit */
206 watermark : 1, /* wakeup_watermark */
207
208 __reserved_1 : 49;
209
210 union {
211 __u32 wakeup_events; /* wakeup every n events */
212 __u32 wakeup_watermark; /* bytes before wakeup */
213 };
214 __u32 __reserved_2;
215
216 __u64 __reserved_3;
217};
218
219/*
220 * Ioctls that can be done on a perf counter fd:
221 */
222#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0)
223#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1)
224#define PERF_COUNTER_IOC_REFRESH _IO ('$', 2)
225#define PERF_COUNTER_IOC_RESET _IO ('$', 3)
226#define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64)
227#define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5)
228
229enum perf_counter_ioc_flags {
230 PERF_IOC_FLAG_GROUP = 1U << 0,
231};
232
233/*
234 * Structure of the page that can be mapped via mmap
235 */
236struct perf_counter_mmap_page {
237 __u32 version; /* version number of this structure */
238 __u32 compat_version; /* lowest version this is compat with */
239
240 /*
241 * Bits needed to read the hw counters in user-space.
242 *
243 * u32 seq;
244 * s64 count;
245 *
246 * do {
247 * seq = pc->lock;
248 *
249 * barrier()
250 * if (pc->index) {
251 * count = pmc_read(pc->index - 1);
252 * count += pc->offset;
253 * } else
254 * goto regular_read;
255 *
256 * barrier();
257 * } while (pc->lock != seq);
258 *
259 * NOTE: for obvious reason this only works on self-monitoring
260 * processes.
261 */
262 __u32 lock; /* seqlock for synchronization */
263 __u32 index; /* hardware counter identifier */
264 __s64 offset; /* add to hardware counter value */
265 __u64 time_enabled; /* time counter active */
266 __u64 time_running; /* time counter on cpu */
267
268 /*
269 * Hole for extension of the self monitor capabilities
270 */
271
272 __u64 __reserved[123]; /* align to 1k */
273
274 /*
275 * Control data for the mmap() data buffer.
276 *
277 * User-space reading the @data_head value should issue an rmb(), on
278 * SMP capable platforms, after reading this value -- see
279 * perf_counter_wakeup().
280 *
281 * When the mapping is PROT_WRITE the @data_tail value should be
282 * written by userspace to reflect the last read data. In this case
283 * the kernel will not over-write unread data.
284 */
285 __u64 data_head; /* head in the data section */
286 __u64 data_tail; /* user-space written tail */
287};
288
289#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0)
290#define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0)
291#define PERF_EVENT_MISC_KERNEL (1 << 0)
292#define PERF_EVENT_MISC_USER (2 << 0)
293#define PERF_EVENT_MISC_HYPERVISOR (3 << 0)
294
295struct perf_event_header {
296 __u32 type;
297 __u16 misc;
298 __u16 size;
299};
300
301enum perf_event_type {
302
303 /*
304 * The MMAP events record the PROT_EXEC mappings so that we can
305 * correlate userspace IPs to code. They have the following structure:
306 *
307 * struct {
308 * struct perf_event_header header;
309 *
310 * u32 pid, tid;
311 * u64 addr;
312 * u64 len;
313 * u64 pgoff;
314 * char filename[];
315 * };
316 */
317 PERF_EVENT_MMAP = 1,
318
319 /*
320 * struct {
321 * struct perf_event_header header;
322 * u64 id;
323 * u64 lost;
324 * };
325 */
326 PERF_EVENT_LOST = 2,
327
328 /*
329 * struct {
330 * struct perf_event_header header;
331 *
332 * u32 pid, tid;
333 * char comm[];
334 * };
335 */
336 PERF_EVENT_COMM = 3,
337
338 /*
339 * struct {
340 * struct perf_event_header header;
341 * u32 pid, ppid;
342 * u32 tid, ptid;
343 * u64 time;
344 * };
345 */
346 PERF_EVENT_EXIT = 4,
347
348 /*
349 * struct {
350 * struct perf_event_header header;
351 * u64 time;
352 * u64 id;
353 * u64 stream_id;
354 * };
355 */
356 PERF_EVENT_THROTTLE = 5,
357 PERF_EVENT_UNTHROTTLE = 6,
358
359 /*
360 * struct {
361 * struct perf_event_header header;
362 * u32 pid, ppid;
363 * u32 tid, ptid;
364 * { u64 time; } && PERF_SAMPLE_TIME
365 * };
366 */
367 PERF_EVENT_FORK = 7,
368
369 /*
370 * struct {
371 * struct perf_event_header header;
372 * u32 pid, tid;
373 *
374 * struct read_format values;
375 * };
376 */
377 PERF_EVENT_READ = 8,
378
379 /*
380 * struct {
381 * struct perf_event_header header;
382 *
383 * { u64 ip; } && PERF_SAMPLE_IP
384 * { u32 pid, tid; } && PERF_SAMPLE_TID
385 * { u64 time; } && PERF_SAMPLE_TIME
386 * { u64 addr; } && PERF_SAMPLE_ADDR
387 * { u64 id; } && PERF_SAMPLE_ID
388 * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
389 * { u32 cpu, res; } && PERF_SAMPLE_CPU
390 * { u64 period; } && PERF_SAMPLE_PERIOD
391 *
392 * { struct read_format values; } && PERF_SAMPLE_READ
393 *
394 * { u64 nr,
395 * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
396 *
397 * #
398 * # The RAW record below is opaque data wrt the ABI
399 * #
400 * # That is, the ABI doesn't make any promises wrt to
401 * # the stability of its content, it may vary depending
402 * # on event, hardware, kernel version and phase of
403 * # the moon.
404 * #
405 * # In other words, PERF_SAMPLE_RAW contents are not an ABI.
406 * #
407 *
408 * { u32 size;
409 * char data[size];}&& PERF_SAMPLE_RAW
410 * };
411 */
412 PERF_EVENT_SAMPLE = 9,
413
414 PERF_EVENT_MAX, /* non-ABI */
415};
416
417enum perf_callchain_context {
418 PERF_CONTEXT_HV = (__u64)-32,
419 PERF_CONTEXT_KERNEL = (__u64)-128,
420 PERF_CONTEXT_USER = (__u64)-512,
421
422 PERF_CONTEXT_GUEST = (__u64)-2048,
423 PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
424 PERF_CONTEXT_GUEST_USER = (__u64)-2560,
425
426 PERF_CONTEXT_MAX = (__u64)-4095,
427};
428
429#define PERF_FLAG_FD_NO_GROUP (1U << 0)
430#define PERF_FLAG_FD_OUTPUT (1U << 1)
431
432/*
433 * In case some app still references the old symbols:
434 */
435
436#define __NR_perf_counter_open __NR_perf_event_open
437
438#define PR_TASK_PERF_COUNTERS_DISABLE PR_TASK_PERF_EVENTS_DISABLE
439#define PR_TASK_PERF_COUNTERS_ENABLE PR_TASK_PERF_EVENTS_ENABLE
440
441#endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ae9d9ed6df2a..acefaf71e6dd 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1,15 +1,15 @@
1/* 1/*
2 * Performance events: 2 * Performance events:
3 * 3 *
4 * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> 4 * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar 5 * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra 6 * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
7 * 7 *
8 * Data type definitions, declarations, prototypes. 8 * Data type definitions, declarations, prototypes.
9 * 9 *
10 * Started by: Thomas Gleixner and Ingo Molnar 10 * Started by: Thomas Gleixner and Ingo Molnar
11 * 11 *
12 * For licencing details see kernel-base/COPYING 12 * For licencing details see kernel-base/COPYING
13 */ 13 */
14#ifndef _LINUX_PERF_EVENT_H 14#ifndef _LINUX_PERF_EVENT_H
15#define _LINUX_PERF_EVENT_H 15#define _LINUX_PERF_EVENT_H
@@ -131,19 +131,19 @@ enum perf_event_sample_format {
131 * as specified by attr.read_format: 131 * as specified by attr.read_format:
132 * 132 *
133 * struct read_format { 133 * struct read_format {
134 * { u64 value; 134 * { u64 value;
135 * { u64 time_enabled; } && PERF_FORMAT_ENABLED 135 * { u64 time_enabled; } && PERF_FORMAT_ENABLED
136 * { u64 time_running; } && PERF_FORMAT_RUNNING 136 * { u64 time_running; } && PERF_FORMAT_RUNNING
137 * { u64 id; } && PERF_FORMAT_ID 137 * { u64 id; } && PERF_FORMAT_ID
138 * } && !PERF_FORMAT_GROUP 138 * } && !PERF_FORMAT_GROUP
139 * 139 *
140 * { u64 nr; 140 * { u64 nr;
141 * { u64 time_enabled; } && PERF_FORMAT_ENABLED 141 * { u64 time_enabled; } && PERF_FORMAT_ENABLED
142 * { u64 time_running; } && PERF_FORMAT_RUNNING 142 * { u64 time_running; } && PERF_FORMAT_RUNNING
143 * { u64 value; 143 * { u64 value;
144 * { u64 id; } && PERF_FORMAT_ID 144 * { u64 id; } && PERF_FORMAT_ID
145 * } cntr[nr]; 145 * } cntr[nr];
146 * } && PERF_FORMAT_GROUP 146 * } && PERF_FORMAT_GROUP
147 * }; 147 * };
148 */ 148 */
149enum perf_event_read_format { 149enum perf_event_read_format {
@@ -152,7 +152,7 @@ enum perf_event_read_format {
152 PERF_FORMAT_ID = 1U << 2, 152 PERF_FORMAT_ID = 1U << 2,
153 PERF_FORMAT_GROUP = 1U << 3, 153 PERF_FORMAT_GROUP = 1U << 3,
154 154
155 PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ 155 PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
156}; 156};
157 157
158#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ 158#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
@@ -216,8 +216,8 @@ struct perf_event_attr {
216 * Ioctls that can be done on a perf event fd: 216 * Ioctls that can be done on a perf event fd:
217 */ 217 */
218#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) 218#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
219#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) 219#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
220#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) 220#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
221#define PERF_EVENT_IOC_RESET _IO ('$', 3) 221#define PERF_EVENT_IOC_RESET _IO ('$', 3)
222#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) 222#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64)
223#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) 223#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
@@ -314,9 +314,9 @@ enum perf_event_type {
314 314
315 /* 315 /*
316 * struct { 316 * struct {
317 * struct perf_event_header header; 317 * struct perf_event_header header;
318 * u64 id; 318 * u64 id;
319 * u64 lost; 319 * u64 lost;
320 * }; 320 * };
321 */ 321 */
322 PERF_RECORD_LOST = 2, 322 PERF_RECORD_LOST = 2,
@@ -383,23 +383,23 @@ enum perf_event_type {
383 * { u64 id; } && PERF_SAMPLE_ID 383 * { u64 id; } && PERF_SAMPLE_ID
384 * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID 384 * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
385 * { u32 cpu, res; } && PERF_SAMPLE_CPU 385 * { u32 cpu, res; } && PERF_SAMPLE_CPU
386 * { u64 period; } && PERF_SAMPLE_PERIOD 386 * { u64 period; } && PERF_SAMPLE_PERIOD
387 * 387 *
388 * { struct read_format values; } && PERF_SAMPLE_READ 388 * { struct read_format values; } && PERF_SAMPLE_READ
389 * 389 *
390 * { u64 nr, 390 * { u64 nr,
391 * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN 391 * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
392 * 392 *
393 * # 393 * #
394 * # The RAW record below is opaque data wrt the ABI 394 * # The RAW record below is opaque data wrt the ABI
395 * # 395 * #
396 * # That is, the ABI doesn't make any promises wrt to 396 * # That is, the ABI doesn't make any promises wrt to
397 * # the stability of its content, it may vary depending 397 * # the stability of its content, it may vary depending
398 * # on event_id, hardware, kernel version and phase of 398 * # on event, hardware, kernel version and phase of
399 * # the moon. 399 * # the moon.
400 * # 400 * #
401 * # In other words, PERF_SAMPLE_RAW contents are not an ABI. 401 * # In other words, PERF_SAMPLE_RAW contents are not an ABI.
402 * # 402 * #
403 * 403 *
404 * { u32 size; 404 * { u32 size;
405 * char data[size];}&& PERF_SAMPLE_RAW 405 * char data[size];}&& PERF_SAMPLE_RAW
@@ -503,10 +503,10 @@ struct pmu {
503 * enum perf_event_active_state - the states of a event 503 * enum perf_event_active_state - the states of a event
504 */ 504 */
505enum perf_event_active_state { 505enum perf_event_active_state {
506 PERF_EVENT_STATE_ERROR = -2, 506 PERF_EVENT_STATE_ERROR = -2,
507 PERF_EVENT_STATE_OFF = -1, 507 PERF_EVENT_STATE_OFF = -1,
508 PERF_EVENT_STATE_INACTIVE = 0, 508 PERF_EVENT_STATE_INACTIVE = 0,
509 PERF_EVENT_STATE_ACTIVE = 1, 509 PERF_EVENT_STATE_ACTIVE = 1,
510}; 510};
511 511
512struct file; 512struct file;
@@ -529,7 +529,7 @@ struct perf_mmap_data {
529 529
530 long watermark; /* wakeup watermark */ 530 long watermark; /* wakeup watermark */
531 531
532 struct perf_event_mmap_page *user_page; 532 struct perf_event_mmap_page *user_page;
533 void *data_pages[0]; 533 void *data_pages[0];
534}; 534};
535 535
@@ -694,14 +694,14 @@ struct perf_cpu_context {
694}; 694};
695 695
696struct perf_output_handle { 696struct perf_output_handle {
697 struct perf_event *event; 697 struct perf_event *event;
698 struct perf_mmap_data *data; 698 struct perf_mmap_data *data;
699 unsigned long head; 699 unsigned long head;
700 unsigned long offset; 700 unsigned long offset;
701 int nmi; 701 int nmi;
702 int sample; 702 int sample;
703 int locked; 703 int locked;
704 unsigned long flags; 704 unsigned long flags;
705}; 705};
706 706
707#ifdef CONFIG_PERF_EVENTS 707#ifdef CONFIG_PERF_EVENTS
@@ -829,22 +829,22 @@ static inline void
829perf_event_task_sched_out(struct task_struct *task, 829perf_event_task_sched_out(struct task_struct *task,
830 struct task_struct *next, int cpu) { } 830 struct task_struct *next, int cpu) { }
831static inline void 831static inline void
832perf_event_task_tick(struct task_struct *task, int cpu) { } 832perf_event_task_tick(struct task_struct *task, int cpu) { }
833static inline int perf_event_init_task(struct task_struct *child) { return 0; } 833static inline int perf_event_init_task(struct task_struct *child) { return 0; }
834static inline void perf_event_exit_task(struct task_struct *child) { } 834static inline void perf_event_exit_task(struct task_struct *child) { }
835static inline void perf_event_free_task(struct task_struct *task) { } 835static inline void perf_event_free_task(struct task_struct *task) { }
836static inline void perf_event_do_pending(void) { } 836static inline void perf_event_do_pending(void) { }
837static inline void perf_event_print_debug(void) { } 837static inline void perf_event_print_debug(void) { }
838static inline void perf_disable(void) { } 838static inline void perf_disable(void) { }
839static inline void perf_enable(void) { } 839static inline void perf_enable(void) { }
840static inline int perf_event_task_disable(void) { return -EINVAL; } 840static inline int perf_event_task_disable(void) { return -EINVAL; }
841static inline int perf_event_task_enable(void) { return -EINVAL; } 841static inline int perf_event_task_enable(void) { return -EINVAL; }
842 842
843static inline void 843static inline void
844perf_sw_event(u32 event_id, u64 nr, int nmi, 844perf_sw_event(u32 event_id, u64 nr, int nmi,
845 struct pt_regs *regs, u64 addr) { } 845 struct pt_regs *regs, u64 addr) { }
846 846
847static inline void perf_event_mmap(struct vm_area_struct *vma) { } 847static inline void perf_event_mmap(struct vm_area_struct *vma) { }
848static inline void perf_event_comm(struct task_struct *tsk) { } 848static inline void perf_event_comm(struct task_struct *tsk) { }
849static inline void perf_event_fork(struct task_struct *tsk) { } 849static inline void perf_event_fork(struct task_struct *tsk) { }
850static inline void perf_event_init(void) { } 850static inline void perf_event_init(void) { }
diff --git a/init/Kconfig b/init/Kconfig
index cfdf5c322806..706728be312f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -920,26 +920,31 @@ config HAVE_PERF_EVENTS
920 help 920 help
921 See tools/perf/design.txt for details. 921 See tools/perf/design.txt for details.
922 922
923menu "Performance Counters" 923menu "Kernel Performance Events And Counters"
924 924
925config PERF_EVENTS 925config PERF_EVENTS
926 bool "Kernel Performance Counters" 926 bool "Kernel performance events and counters"
927 default y if PROFILING 927 default y if (PROFILING || PERF_COUNTERS)
928 depends on HAVE_PERF_EVENTS 928 depends on HAVE_PERF_EVENTS
929 select ANON_INODES 929 select ANON_INODES
930 help 930 help
931 Enable kernel support for performance counter hardware. 931 Enable kernel support for various performance events provided
932 by software and hardware.
932 933
933 Performance counters are special hardware registers available 934 Software events are supported either build-in or via the
934 on most modern CPUs. These registers count the number of certain 935 use of generic tracepoints.
936
937 Most modern CPUs support performance events via performance
938 counter registers. These registers count the number of certain
935 types of hw events: such as instructions executed, cachemisses 939 types of hw events: such as instructions executed, cachemisses
936 suffered, or branches mis-predicted - without slowing down the 940 suffered, or branches mis-predicted - without slowing down the
937 kernel or applications. These registers can also trigger interrupts 941 kernel or applications. These registers can also trigger interrupts
938 when a threshold number of events have passed - and can thus be 942 when a threshold number of events have passed - and can thus be
939 used to profile the code that runs on that CPU. 943 used to profile the code that runs on that CPU.
940 944
941 The Linux Performance Counter subsystem provides an abstraction of 945 The Linux Performance Event subsystem provides an abstraction of
942 these hardware capabilities, available via a system call. It 946 these software and hardware cevent apabilities, available via a
947 system call and used by the "perf" utility in tools/perf/. It
943 provides per task and per CPU counters, and it provides event 948 provides per task and per CPU counters, and it provides event
944 capabilities on top of those. 949 capabilities on top of those.
945 950
@@ -950,14 +955,26 @@ config EVENT_PROFILE
950 depends on PERF_EVENTS && EVENT_TRACING 955 depends on PERF_EVENTS && EVENT_TRACING
951 default y 956 default y
952 help 957 help
953 Allow the use of tracepoints as software performance counters. 958 Allow the use of tracepoints as software performance events.
954 959
955 When this is enabled, you can create perf counters based on 960 When this is enabled, you can create perf events based on
956 tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID 961 tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID
957 found in debugfs://tracing/events/*/*/id. (The -e/--events 962 found in debugfs://tracing/events/*/*/id. (The -e/--events
958 option to the perf tool can parse and interpret symbolic 963 option to the perf tool can parse and interpret symbolic
959 tracepoints, in the subsystem:tracepoint_name format.) 964 tracepoints, in the subsystem:tracepoint_name format.)
960 965
966config PERF_COUNTERS
967 bool "Kernel performance counters (old config option)"
968 depends on HAVE_PERF_EVENTS
969 help
970 This config has been obsoleted by the PERF_EVENTS
971 config option - please see that one for details.
972
973 It has no effect on the kernel whether you enable
974 it or not, it is a compatibility placeholder.
975
976 Say N if unsure.
977
961endmenu 978endmenu
962 979
963config VM_EVENT_COUNTERS 980config VM_EVENT_COUNTERS
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 6e8b99a04e1e..76ac4db405e9 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1,12 +1,12 @@
1/* 1/*
2 * Performance event core code 2 * Performance events core code:
3 * 3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> 4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar 5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 6 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
7 * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 7 * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
8 * 8 *
9 * For licensing details see kernel-base/COPYING 9 * For licensing details see kernel-base/COPYING
10 */ 10 */
11 11
12#include <linux/fs.h> 12#include <linux/fs.h>