diff options
-rw-r--r-- | MAINTAINERS | 2 | ||||
-rw-r--r-- | arch/powerpc/include/asm/paca.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/perf_event.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 14 | ||||
-rw-r--r-- | include/linux/perf_counter.h | 441 | ||||
-rw-r--r-- | include/linux/perf_event.h | 98 | ||||
-rw-r--r-- | init/Kconfig | 37 | ||||
-rw-r--r-- | kernel/perf_event.c | 4 |
8 files changed, 534 insertions, 76 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 43761a00e3f1..751a307dc44e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -4000,7 +4000,7 @@ S: Maintained | |||
4000 | F: include/linux/delayacct.h | 4000 | F: include/linux/delayacct.h |
4001 | F: kernel/delayacct.c | 4001 | F: kernel/delayacct.c |
4002 | 4002 | ||
4003 | PERFORMANCE COUNTER SUBSYSTEM | 4003 | PERFORMANCE EVENTS SUBSYSTEM |
4004 | M: Peter Zijlstra <a.p.zijlstra@chello.nl> | 4004 | M: Peter Zijlstra <a.p.zijlstra@chello.nl> |
4005 | M: Paul Mackerras <paulus@samba.org> | 4005 | M: Paul Mackerras <paulus@samba.org> |
4006 | M: Ingo Molnar <mingo@elte.hu> | 4006 | M: Ingo Molnar <mingo@elte.hu> |
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 154f405b642f..7d8514ceceae 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h | |||
@@ -122,7 +122,7 @@ struct paca_struct { | |||
122 | u8 soft_enabled; /* irq soft-enable flag */ | 122 | u8 soft_enabled; /* irq soft-enable flag */ |
123 | u8 hard_enabled; /* set if irqs are enabled in MSR */ | 123 | u8 hard_enabled; /* set if irqs are enabled in MSR */ |
124 | u8 io_sync; /* writel() needs spin_unlock sync */ | 124 | u8 io_sync; /* writel() needs spin_unlock sync */ |
125 | u8 perf_event_pending; /* PM interrupt while soft-disabled */ | 125 | u8 perf_event_pending; /* PM interrupt while soft-disabled */ |
126 | 126 | ||
127 | /* Stuff for accurate time accounting */ | 127 | /* Stuff for accurate time accounting */ |
128 | u64 user_time; /* accumulated usermode TB ticks */ | 128 | u64 user_time; /* accumulated usermode TB ticks */ |
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index c98321fcb459..197b7d958796 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c | |||
@@ -41,7 +41,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | |||
41 | struct power_pmu *ppmu; | 41 | struct power_pmu *ppmu; |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * Normally, to ignore kernel events we set the FCS (freeze events | 44 | * Normally, to ignore kernel events we set the FCS (freeze counters |
45 | * in supervisor mode) bit in MMCR0, but if the kernel runs with the | 45 | * in supervisor mode) bit in MMCR0, but if the kernel runs with the |
46 | * hypervisor bit set in the MSR, or if we are running on a processor | 46 | * hypervisor bit set in the MSR, or if we are running on a processor |
47 | * where the hypervisor bit is forced to 1 (as on Apple G5 processors), | 47 | * where the hypervisor bit is forced to 1 (as on Apple G5 processors), |
@@ -159,7 +159,7 @@ void perf_event_print_debug(void) | |||
159 | } | 159 | } |
160 | 160 | ||
161 | /* | 161 | /* |
162 | * Read one performance monitor event (PMC). | 162 | * Read one performance monitor counter (PMC). |
163 | */ | 163 | */ |
164 | static unsigned long read_pmc(int idx) | 164 | static unsigned long read_pmc(int idx) |
165 | { | 165 | { |
@@ -409,7 +409,7 @@ static void power_pmu_read(struct perf_event *event) | |||
409 | val = read_pmc(event->hw.idx); | 409 | val = read_pmc(event->hw.idx); |
410 | } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); | 410 | } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); |
411 | 411 | ||
412 | /* The events are only 32 bits wide */ | 412 | /* The counters are only 32 bits wide */ |
413 | delta = (val - prev) & 0xfffffffful; | 413 | delta = (val - prev) & 0xfffffffful; |
414 | atomic64_add(delta, &event->count); | 414 | atomic64_add(delta, &event->count); |
415 | atomic64_sub(delta, &event->hw.period_left); | 415 | atomic64_sub(delta, &event->hw.period_left); |
@@ -543,7 +543,7 @@ void hw_perf_disable(void) | |||
543 | } | 543 | } |
544 | 544 | ||
545 | /* | 545 | /* |
546 | * Set the 'freeze events' bit. | 546 | * Set the 'freeze counters' bit. |
547 | * The barrier is to make sure the mtspr has been | 547 | * The barrier is to make sure the mtspr has been |
548 | * executed and the PMU has frozen the events | 548 | * executed and the PMU has frozen the events |
549 | * before we return. | 549 | * before we return. |
@@ -1124,7 +1124,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
1124 | } | 1124 | } |
1125 | 1125 | ||
1126 | /* | 1126 | /* |
1127 | * A event has overflowed; update its count and record | 1127 | * A counter has overflowed; update its count and record |
1128 | * things if requested. Note that interrupts are hard-disabled | 1128 | * things if requested. Note that interrupts are hard-disabled |
1129 | * here so there is no possibility of being interrupted. | 1129 | * here so there is no possibility of being interrupted. |
1130 | */ | 1130 | */ |
@@ -1271,7 +1271,7 @@ static void perf_event_interrupt(struct pt_regs *regs) | |||
1271 | 1271 | ||
1272 | /* | 1272 | /* |
1273 | * Reset MMCR0 to its normal value. This will set PMXE and | 1273 | * Reset MMCR0 to its normal value. This will set PMXE and |
1274 | * clear FC (freeze events) and PMAO (perf mon alert occurred) | 1274 | * clear FC (freeze counters) and PMAO (perf mon alert occurred) |
1275 | * and thus allow interrupts to occur again. | 1275 | * and thus allow interrupts to occur again. |
1276 | * XXX might want to use MSR.PM to keep the events frozen until | 1276 | * XXX might want to use MSR.PM to keep the events frozen until |
1277 | * we get back out of this interrupt. | 1277 | * we get back out of this interrupt. |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0d03629fb1a5..a3c7adb06b78 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -2081,13 +2081,13 @@ void __init init_hw_perf_events(void) | |||
2081 | perf_events_lapic_init(); | 2081 | perf_events_lapic_init(); |
2082 | register_die_notifier(&perf_event_nmi_notifier); | 2082 | register_die_notifier(&perf_event_nmi_notifier); |
2083 | 2083 | ||
2084 | pr_info("... version: %d\n", x86_pmu.version); | 2084 | pr_info("... version: %d\n", x86_pmu.version); |
2085 | pr_info("... bit width: %d\n", x86_pmu.event_bits); | 2085 | pr_info("... bit width: %d\n", x86_pmu.event_bits); |
2086 | pr_info("... generic events: %d\n", x86_pmu.num_events); | 2086 | pr_info("... generic registers: %d\n", x86_pmu.num_events); |
2087 | pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); | 2087 | pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); |
2088 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | 2088 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); |
2089 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); | 2089 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); |
2090 | pr_info("... event mask: %016Lx\n", perf_event_mask); | 2090 | pr_info("... event mask: %016Lx\n", perf_event_mask); |
2091 | } | 2091 | } |
2092 | 2092 | ||
2093 | static inline void x86_pmu_read(struct perf_event *event) | 2093 | static inline void x86_pmu_read(struct perf_event *event) |
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h new file mode 100644 index 000000000000..368bd70f1d2d --- /dev/null +++ b/include/linux/perf_counter.h | |||
@@ -0,0 +1,441 @@ | |||
1 | /* | ||
2 | * NOTE: this file will be removed in a future kernel release, it is | ||
3 | * provided as a courtesy copy of user-space code that relies on the | ||
4 | * old (pre-rename) symbols and constants. | ||
5 | * | ||
6 | * Performance events: | ||
7 | * | ||
8 | * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> | ||
9 | * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar | ||
10 | * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra | ||
11 | * | ||
12 | * Data type definitions, declarations, prototypes. | ||
13 | * | ||
14 | * Started by: Thomas Gleixner and Ingo Molnar | ||
15 | * | ||
16 | * For licencing details see kernel-base/COPYING | ||
17 | */ | ||
18 | #ifndef _LINUX_PERF_COUNTER_H | ||
19 | #define _LINUX_PERF_COUNTER_H | ||
20 | |||
21 | #include <linux/types.h> | ||
22 | #include <linux/ioctl.h> | ||
23 | #include <asm/byteorder.h> | ||
24 | |||
25 | /* | ||
26 | * User-space ABI bits: | ||
27 | */ | ||
28 | |||
29 | /* | ||
30 | * attr.type | ||
31 | */ | ||
32 | enum perf_type_id { | ||
33 | PERF_TYPE_HARDWARE = 0, | ||
34 | PERF_TYPE_SOFTWARE = 1, | ||
35 | PERF_TYPE_TRACEPOINT = 2, | ||
36 | PERF_TYPE_HW_CACHE = 3, | ||
37 | PERF_TYPE_RAW = 4, | ||
38 | |||
39 | PERF_TYPE_MAX, /* non-ABI */ | ||
40 | }; | ||
41 | |||
42 | /* | ||
43 | * Generalized performance counter event types, used by the | ||
44 | * attr.event_id parameter of the sys_perf_counter_open() | ||
45 | * syscall: | ||
46 | */ | ||
47 | enum perf_hw_id { | ||
48 | /* | ||
49 | * Common hardware events, generalized by the kernel: | ||
50 | */ | ||
51 | PERF_COUNT_HW_CPU_CYCLES = 0, | ||
52 | PERF_COUNT_HW_INSTRUCTIONS = 1, | ||
53 | PERF_COUNT_HW_CACHE_REFERENCES = 2, | ||
54 | PERF_COUNT_HW_CACHE_MISSES = 3, | ||
55 | PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, | ||
56 | PERF_COUNT_HW_BRANCH_MISSES = 5, | ||
57 | PERF_COUNT_HW_BUS_CYCLES = 6, | ||
58 | |||
59 | PERF_COUNT_HW_MAX, /* non-ABI */ | ||
60 | }; | ||
61 | |||
62 | /* | ||
63 | * Generalized hardware cache counters: | ||
64 | * | ||
65 | * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x | ||
66 | * { read, write, prefetch } x | ||
67 | * { accesses, misses } | ||
68 | */ | ||
69 | enum perf_hw_cache_id { | ||
70 | PERF_COUNT_HW_CACHE_L1D = 0, | ||
71 | PERF_COUNT_HW_CACHE_L1I = 1, | ||
72 | PERF_COUNT_HW_CACHE_LL = 2, | ||
73 | PERF_COUNT_HW_CACHE_DTLB = 3, | ||
74 | PERF_COUNT_HW_CACHE_ITLB = 4, | ||
75 | PERF_COUNT_HW_CACHE_BPU = 5, | ||
76 | |||
77 | PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ | ||
78 | }; | ||
79 | |||
80 | enum perf_hw_cache_op_id { | ||
81 | PERF_COUNT_HW_CACHE_OP_READ = 0, | ||
82 | PERF_COUNT_HW_CACHE_OP_WRITE = 1, | ||
83 | PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, | ||
84 | |||
85 | PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ | ||
86 | }; | ||
87 | |||
88 | enum perf_hw_cache_op_result_id { | ||
89 | PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, | ||
90 | PERF_COUNT_HW_CACHE_RESULT_MISS = 1, | ||
91 | |||
92 | PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ | ||
93 | }; | ||
94 | |||
95 | /* | ||
96 | * Special "software" counters provided by the kernel, even if the hardware | ||
97 | * does not support performance counters. These counters measure various | ||
98 | * physical and sw events of the kernel (and allow the profiling of them as | ||
99 | * well): | ||
100 | */ | ||
101 | enum perf_sw_ids { | ||
102 | PERF_COUNT_SW_CPU_CLOCK = 0, | ||
103 | PERF_COUNT_SW_TASK_CLOCK = 1, | ||
104 | PERF_COUNT_SW_PAGE_FAULTS = 2, | ||
105 | PERF_COUNT_SW_CONTEXT_SWITCHES = 3, | ||
106 | PERF_COUNT_SW_CPU_MIGRATIONS = 4, | ||
107 | PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, | ||
108 | PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, | ||
109 | |||
110 | PERF_COUNT_SW_MAX, /* non-ABI */ | ||
111 | }; | ||
112 | |||
113 | /* | ||
114 | * Bits that can be set in attr.sample_type to request information | ||
115 | * in the overflow packets. | ||
116 | */ | ||
117 | enum perf_counter_sample_format { | ||
118 | PERF_SAMPLE_IP = 1U << 0, | ||
119 | PERF_SAMPLE_TID = 1U << 1, | ||
120 | PERF_SAMPLE_TIME = 1U << 2, | ||
121 | PERF_SAMPLE_ADDR = 1U << 3, | ||
122 | PERF_SAMPLE_READ = 1U << 4, | ||
123 | PERF_SAMPLE_CALLCHAIN = 1U << 5, | ||
124 | PERF_SAMPLE_ID = 1U << 6, | ||
125 | PERF_SAMPLE_CPU = 1U << 7, | ||
126 | PERF_SAMPLE_PERIOD = 1U << 8, | ||
127 | PERF_SAMPLE_STREAM_ID = 1U << 9, | ||
128 | PERF_SAMPLE_RAW = 1U << 10, | ||
129 | |||
130 | PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ | ||
131 | }; | ||
132 | |||
133 | /* | ||
134 | * The format of the data returned by read() on a perf counter fd, | ||
135 | * as specified by attr.read_format: | ||
136 | * | ||
137 | * struct read_format { | ||
138 | * { u64 value; | ||
139 | * { u64 time_enabled; } && PERF_FORMAT_ENABLED | ||
140 | * { u64 time_running; } && PERF_FORMAT_RUNNING | ||
141 | * { u64 id; } && PERF_FORMAT_ID | ||
142 | * } && !PERF_FORMAT_GROUP | ||
143 | * | ||
144 | * { u64 nr; | ||
145 | * { u64 time_enabled; } && PERF_FORMAT_ENABLED | ||
146 | * { u64 time_running; } && PERF_FORMAT_RUNNING | ||
147 | * { u64 value; | ||
148 | * { u64 id; } && PERF_FORMAT_ID | ||
149 | * } cntr[nr]; | ||
150 | * } && PERF_FORMAT_GROUP | ||
151 | * }; | ||
152 | */ | ||
153 | enum perf_counter_read_format { | ||
154 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, | ||
155 | PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, | ||
156 | PERF_FORMAT_ID = 1U << 2, | ||
157 | PERF_FORMAT_GROUP = 1U << 3, | ||
158 | |||
159 | PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ | ||
160 | }; | ||
161 | |||
162 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ | ||
163 | |||
164 | /* | ||
165 | * Hardware event to monitor via a performance monitoring counter: | ||
166 | */ | ||
167 | struct perf_counter_attr { | ||
168 | |||
169 | /* | ||
170 | * Major type: hardware/software/tracepoint/etc. | ||
171 | */ | ||
172 | __u32 type; | ||
173 | |||
174 | /* | ||
175 | * Size of the attr structure, for fwd/bwd compat. | ||
176 | */ | ||
177 | __u32 size; | ||
178 | |||
179 | /* | ||
180 | * Type specific configuration information. | ||
181 | */ | ||
182 | __u64 config; | ||
183 | |||
184 | union { | ||
185 | __u64 sample_period; | ||
186 | __u64 sample_freq; | ||
187 | }; | ||
188 | |||
189 | __u64 sample_type; | ||
190 | __u64 read_format; | ||
191 | |||
192 | __u64 disabled : 1, /* off by default */ | ||
193 | inherit : 1, /* children inherit it */ | ||
194 | pinned : 1, /* must always be on PMU */ | ||
195 | exclusive : 1, /* only group on PMU */ | ||
196 | exclude_user : 1, /* don't count user */ | ||
197 | exclude_kernel : 1, /* ditto kernel */ | ||
198 | exclude_hv : 1, /* ditto hypervisor */ | ||
199 | exclude_idle : 1, /* don't count when idle */ | ||
200 | mmap : 1, /* include mmap data */ | ||
201 | comm : 1, /* include comm data */ | ||
202 | freq : 1, /* use freq, not period */ | ||
203 | inherit_stat : 1, /* per task counts */ | ||
204 | enable_on_exec : 1, /* next exec enables */ | ||
205 | task : 1, /* trace fork/exit */ | ||
206 | watermark : 1, /* wakeup_watermark */ | ||
207 | |||
208 | __reserved_1 : 49; | ||
209 | |||
210 | union { | ||
211 | __u32 wakeup_events; /* wakeup every n events */ | ||
212 | __u32 wakeup_watermark; /* bytes before wakeup */ | ||
213 | }; | ||
214 | __u32 __reserved_2; | ||
215 | |||
216 | __u64 __reserved_3; | ||
217 | }; | ||
218 | |||
219 | /* | ||
220 | * Ioctls that can be done on a perf counter fd: | ||
221 | */ | ||
222 | #define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) | ||
223 | #define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) | ||
224 | #define PERF_COUNTER_IOC_REFRESH _IO ('$', 2) | ||
225 | #define PERF_COUNTER_IOC_RESET _IO ('$', 3) | ||
226 | #define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) | ||
227 | #define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5) | ||
228 | |||
229 | enum perf_counter_ioc_flags { | ||
230 | PERF_IOC_FLAG_GROUP = 1U << 0, | ||
231 | }; | ||
232 | |||
233 | /* | ||
234 | * Structure of the page that can be mapped via mmap | ||
235 | */ | ||
236 | struct perf_counter_mmap_page { | ||
237 | __u32 version; /* version number of this structure */ | ||
238 | __u32 compat_version; /* lowest version this is compat with */ | ||
239 | |||
240 | /* | ||
241 | * Bits needed to read the hw counters in user-space. | ||
242 | * | ||
243 | * u32 seq; | ||
244 | * s64 count; | ||
245 | * | ||
246 | * do { | ||
247 | * seq = pc->lock; | ||
248 | * | ||
249 | * barrier() | ||
250 | * if (pc->index) { | ||
251 | * count = pmc_read(pc->index - 1); | ||
252 | * count += pc->offset; | ||
253 | * } else | ||
254 | * goto regular_read; | ||
255 | * | ||
256 | * barrier(); | ||
257 | * } while (pc->lock != seq); | ||
258 | * | ||
259 | * NOTE: for obvious reason this only works on self-monitoring | ||
260 | * processes. | ||
261 | */ | ||
262 | __u32 lock; /* seqlock for synchronization */ | ||
263 | __u32 index; /* hardware counter identifier */ | ||
264 | __s64 offset; /* add to hardware counter value */ | ||
265 | __u64 time_enabled; /* time counter active */ | ||
266 | __u64 time_running; /* time counter on cpu */ | ||
267 | |||
268 | /* | ||
269 | * Hole for extension of the self monitor capabilities | ||
270 | */ | ||
271 | |||
272 | __u64 __reserved[123]; /* align to 1k */ | ||
273 | |||
274 | /* | ||
275 | * Control data for the mmap() data buffer. | ||
276 | * | ||
277 | * User-space reading the @data_head value should issue an rmb(), on | ||
278 | * SMP capable platforms, after reading this value -- see | ||
279 | * perf_counter_wakeup(). | ||
280 | * | ||
281 | * When the mapping is PROT_WRITE the @data_tail value should be | ||
282 | * written by userspace to reflect the last read data. In this case | ||
283 | * the kernel will not over-write unread data. | ||
284 | */ | ||
285 | __u64 data_head; /* head in the data section */ | ||
286 | __u64 data_tail; /* user-space written tail */ | ||
287 | }; | ||
288 | |||
289 | #define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) | ||
290 | #define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0) | ||
291 | #define PERF_EVENT_MISC_KERNEL (1 << 0) | ||
292 | #define PERF_EVENT_MISC_USER (2 << 0) | ||
293 | #define PERF_EVENT_MISC_HYPERVISOR (3 << 0) | ||
294 | |||
295 | struct perf_event_header { | ||
296 | __u32 type; | ||
297 | __u16 misc; | ||
298 | __u16 size; | ||
299 | }; | ||
300 | |||
301 | enum perf_event_type { | ||
302 | |||
303 | /* | ||
304 | * The MMAP events record the PROT_EXEC mappings so that we can | ||
305 | * correlate userspace IPs to code. They have the following structure: | ||
306 | * | ||
307 | * struct { | ||
308 | * struct perf_event_header header; | ||
309 | * | ||
310 | * u32 pid, tid; | ||
311 | * u64 addr; | ||
312 | * u64 len; | ||
313 | * u64 pgoff; | ||
314 | * char filename[]; | ||
315 | * }; | ||
316 | */ | ||
317 | PERF_EVENT_MMAP = 1, | ||
318 | |||
319 | /* | ||
320 | * struct { | ||
321 | * struct perf_event_header header; | ||
322 | * u64 id; | ||
323 | * u64 lost; | ||
324 | * }; | ||
325 | */ | ||
326 | PERF_EVENT_LOST = 2, | ||
327 | |||
328 | /* | ||
329 | * struct { | ||
330 | * struct perf_event_header header; | ||
331 | * | ||
332 | * u32 pid, tid; | ||
333 | * char comm[]; | ||
334 | * }; | ||
335 | */ | ||
336 | PERF_EVENT_COMM = 3, | ||
337 | |||
338 | /* | ||
339 | * struct { | ||
340 | * struct perf_event_header header; | ||
341 | * u32 pid, ppid; | ||
342 | * u32 tid, ptid; | ||
343 | * u64 time; | ||
344 | * }; | ||
345 | */ | ||
346 | PERF_EVENT_EXIT = 4, | ||
347 | |||
348 | /* | ||
349 | * struct { | ||
350 | * struct perf_event_header header; | ||
351 | * u64 time; | ||
352 | * u64 id; | ||
353 | * u64 stream_id; | ||
354 | * }; | ||
355 | */ | ||
356 | PERF_EVENT_THROTTLE = 5, | ||
357 | PERF_EVENT_UNTHROTTLE = 6, | ||
358 | |||
359 | /* | ||
360 | * struct { | ||
361 | * struct perf_event_header header; | ||
362 | * u32 pid, ppid; | ||
363 | * u32 tid, ptid; | ||
364 | * { u64 time; } && PERF_SAMPLE_TIME | ||
365 | * }; | ||
366 | */ | ||
367 | PERF_EVENT_FORK = 7, | ||
368 | |||
369 | /* | ||
370 | * struct { | ||
371 | * struct perf_event_header header; | ||
372 | * u32 pid, tid; | ||
373 | * | ||
374 | * struct read_format values; | ||
375 | * }; | ||
376 | */ | ||
377 | PERF_EVENT_READ = 8, | ||
378 | |||
379 | /* | ||
380 | * struct { | ||
381 | * struct perf_event_header header; | ||
382 | * | ||
383 | * { u64 ip; } && PERF_SAMPLE_IP | ||
384 | * { u32 pid, tid; } && PERF_SAMPLE_TID | ||
385 | * { u64 time; } && PERF_SAMPLE_TIME | ||
386 | * { u64 addr; } && PERF_SAMPLE_ADDR | ||
387 | * { u64 id; } && PERF_SAMPLE_ID | ||
388 | * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID | ||
389 | * { u32 cpu, res; } && PERF_SAMPLE_CPU | ||
390 | * { u64 period; } && PERF_SAMPLE_PERIOD | ||
391 | * | ||
392 | * { struct read_format values; } && PERF_SAMPLE_READ | ||
393 | * | ||
394 | * { u64 nr, | ||
395 | * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN | ||
396 | * | ||
397 | * # | ||
398 | * # The RAW record below is opaque data wrt the ABI | ||
399 | * # | ||
400 | * # That is, the ABI doesn't make any promises wrt to | ||
401 | * # the stability of its content, it may vary depending | ||
402 | * # on event, hardware, kernel version and phase of | ||
403 | * # the moon. | ||
404 | * # | ||
405 | * # In other words, PERF_SAMPLE_RAW contents are not an ABI. | ||
406 | * # | ||
407 | * | ||
408 | * { u32 size; | ||
409 | * char data[size];}&& PERF_SAMPLE_RAW | ||
410 | * }; | ||
411 | */ | ||
412 | PERF_EVENT_SAMPLE = 9, | ||
413 | |||
414 | PERF_EVENT_MAX, /* non-ABI */ | ||
415 | }; | ||
416 | |||
417 | enum perf_callchain_context { | ||
418 | PERF_CONTEXT_HV = (__u64)-32, | ||
419 | PERF_CONTEXT_KERNEL = (__u64)-128, | ||
420 | PERF_CONTEXT_USER = (__u64)-512, | ||
421 | |||
422 | PERF_CONTEXT_GUEST = (__u64)-2048, | ||
423 | PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, | ||
424 | PERF_CONTEXT_GUEST_USER = (__u64)-2560, | ||
425 | |||
426 | PERF_CONTEXT_MAX = (__u64)-4095, | ||
427 | }; | ||
428 | |||
429 | #define PERF_FLAG_FD_NO_GROUP (1U << 0) | ||
430 | #define PERF_FLAG_FD_OUTPUT (1U << 1) | ||
431 | |||
432 | /* | ||
433 | * In case some app still references the old symbols: | ||
434 | */ | ||
435 | |||
436 | #define __NR_perf_counter_open __NR_perf_event_open | ||
437 | |||
438 | #define PR_TASK_PERF_COUNTERS_DISABLE PR_TASK_PERF_EVENTS_DISABLE | ||
439 | #define PR_TASK_PERF_COUNTERS_ENABLE PR_TASK_PERF_EVENTS_ENABLE | ||
440 | |||
441 | #endif /* _LINUX_PERF_COUNTER_H */ | ||
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ae9d9ed6df2a..acefaf71e6dd 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -1,15 +1,15 @@ | |||
1 | /* | 1 | /* |
2 | * Performance events: | 2 | * Performance events: |
3 | * | 3 | * |
4 | * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> | 4 | * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> |
5 | * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar | 5 | * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar |
6 | * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra | 6 | * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra |
7 | * | 7 | * |
8 | * Data type definitions, declarations, prototypes. | 8 | * Data type definitions, declarations, prototypes. |
9 | * | 9 | * |
10 | * Started by: Thomas Gleixner and Ingo Molnar | 10 | * Started by: Thomas Gleixner and Ingo Molnar |
11 | * | 11 | * |
12 | * For licencing details see kernel-base/COPYING | 12 | * For licencing details see kernel-base/COPYING |
13 | */ | 13 | */ |
14 | #ifndef _LINUX_PERF_EVENT_H | 14 | #ifndef _LINUX_PERF_EVENT_H |
15 | #define _LINUX_PERF_EVENT_H | 15 | #define _LINUX_PERF_EVENT_H |
@@ -131,19 +131,19 @@ enum perf_event_sample_format { | |||
131 | * as specified by attr.read_format: | 131 | * as specified by attr.read_format: |
132 | * | 132 | * |
133 | * struct read_format { | 133 | * struct read_format { |
134 | * { u64 value; | 134 | * { u64 value; |
135 | * { u64 time_enabled; } && PERF_FORMAT_ENABLED | 135 | * { u64 time_enabled; } && PERF_FORMAT_ENABLED |
136 | * { u64 time_running; } && PERF_FORMAT_RUNNING | 136 | * { u64 time_running; } && PERF_FORMAT_RUNNING |
137 | * { u64 id; } && PERF_FORMAT_ID | 137 | * { u64 id; } && PERF_FORMAT_ID |
138 | * } && !PERF_FORMAT_GROUP | 138 | * } && !PERF_FORMAT_GROUP |
139 | * | 139 | * |
140 | * { u64 nr; | 140 | * { u64 nr; |
141 | * { u64 time_enabled; } && PERF_FORMAT_ENABLED | 141 | * { u64 time_enabled; } && PERF_FORMAT_ENABLED |
142 | * { u64 time_running; } && PERF_FORMAT_RUNNING | 142 | * { u64 time_running; } && PERF_FORMAT_RUNNING |
143 | * { u64 value; | 143 | * { u64 value; |
144 | * { u64 id; } && PERF_FORMAT_ID | 144 | * { u64 id; } && PERF_FORMAT_ID |
145 | * } cntr[nr]; | 145 | * } cntr[nr]; |
146 | * } && PERF_FORMAT_GROUP | 146 | * } && PERF_FORMAT_GROUP |
147 | * }; | 147 | * }; |
148 | */ | 148 | */ |
149 | enum perf_event_read_format { | 149 | enum perf_event_read_format { |
@@ -152,7 +152,7 @@ enum perf_event_read_format { | |||
152 | PERF_FORMAT_ID = 1U << 2, | 152 | PERF_FORMAT_ID = 1U << 2, |
153 | PERF_FORMAT_GROUP = 1U << 3, | 153 | PERF_FORMAT_GROUP = 1U << 3, |
154 | 154 | ||
155 | PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ | 155 | PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ |
156 | }; | 156 | }; |
157 | 157 | ||
158 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ | 158 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ |
@@ -216,8 +216,8 @@ struct perf_event_attr { | |||
216 | * Ioctls that can be done on a perf event fd: | 216 | * Ioctls that can be done on a perf event fd: |
217 | */ | 217 | */ |
218 | #define PERF_EVENT_IOC_ENABLE _IO ('$', 0) | 218 | #define PERF_EVENT_IOC_ENABLE _IO ('$', 0) |
219 | #define PERF_EVENT_IOC_DISABLE _IO ('$', 1) | 219 | #define PERF_EVENT_IOC_DISABLE _IO ('$', 1) |
220 | #define PERF_EVENT_IOC_REFRESH _IO ('$', 2) | 220 | #define PERF_EVENT_IOC_REFRESH _IO ('$', 2) |
221 | #define PERF_EVENT_IOC_RESET _IO ('$', 3) | 221 | #define PERF_EVENT_IOC_RESET _IO ('$', 3) |
222 | #define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) | 222 | #define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) |
223 | #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) | 223 | #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) |
@@ -314,9 +314,9 @@ enum perf_event_type { | |||
314 | 314 | ||
315 | /* | 315 | /* |
316 | * struct { | 316 | * struct { |
317 | * struct perf_event_header header; | 317 | * struct perf_event_header header; |
318 | * u64 id; | 318 | * u64 id; |
319 | * u64 lost; | 319 | * u64 lost; |
320 | * }; | 320 | * }; |
321 | */ | 321 | */ |
322 | PERF_RECORD_LOST = 2, | 322 | PERF_RECORD_LOST = 2, |
@@ -383,23 +383,23 @@ enum perf_event_type { | |||
383 | * { u64 id; } && PERF_SAMPLE_ID | 383 | * { u64 id; } && PERF_SAMPLE_ID |
384 | * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID | 384 | * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID |
385 | * { u32 cpu, res; } && PERF_SAMPLE_CPU | 385 | * { u32 cpu, res; } && PERF_SAMPLE_CPU |
386 | * { u64 period; } && PERF_SAMPLE_PERIOD | 386 | * { u64 period; } && PERF_SAMPLE_PERIOD |
387 | * | 387 | * |
388 | * { struct read_format values; } && PERF_SAMPLE_READ | 388 | * { struct read_format values; } && PERF_SAMPLE_READ |
389 | * | 389 | * |
390 | * { u64 nr, | 390 | * { u64 nr, |
391 | * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN | 391 | * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN |
392 | * | 392 | * |
393 | * # | 393 | * # |
394 | * # The RAW record below is opaque data wrt the ABI | 394 | * # The RAW record below is opaque data wrt the ABI |
395 | * # | 395 | * # |
396 | * # That is, the ABI doesn't make any promises wrt to | 396 | * # That is, the ABI doesn't make any promises wrt to |
397 | * # the stability of its content, it may vary depending | 397 | * # the stability of its content, it may vary depending |
398 | * # on event_id, hardware, kernel version and phase of | 398 | * # on event, hardware, kernel version and phase of |
399 | * # the moon. | 399 | * # the moon. |
400 | * # | 400 | * # |
401 | * # In other words, PERF_SAMPLE_RAW contents are not an ABI. | 401 | * # In other words, PERF_SAMPLE_RAW contents are not an ABI. |
402 | * # | 402 | * # |
403 | * | 403 | * |
404 | * { u32 size; | 404 | * { u32 size; |
405 | * char data[size];}&& PERF_SAMPLE_RAW | 405 | * char data[size];}&& PERF_SAMPLE_RAW |
@@ -503,10 +503,10 @@ struct pmu { | |||
503 | * enum perf_event_active_state - the states of a event | 503 | * enum perf_event_active_state - the states of a event |
504 | */ | 504 | */ |
505 | enum perf_event_active_state { | 505 | enum perf_event_active_state { |
506 | PERF_EVENT_STATE_ERROR = -2, | 506 | PERF_EVENT_STATE_ERROR = -2, |
507 | PERF_EVENT_STATE_OFF = -1, | 507 | PERF_EVENT_STATE_OFF = -1, |
508 | PERF_EVENT_STATE_INACTIVE = 0, | 508 | PERF_EVENT_STATE_INACTIVE = 0, |
509 | PERF_EVENT_STATE_ACTIVE = 1, | 509 | PERF_EVENT_STATE_ACTIVE = 1, |
510 | }; | 510 | }; |
511 | 511 | ||
512 | struct file; | 512 | struct file; |
@@ -529,7 +529,7 @@ struct perf_mmap_data { | |||
529 | 529 | ||
530 | long watermark; /* wakeup watermark */ | 530 | long watermark; /* wakeup watermark */ |
531 | 531 | ||
532 | struct perf_event_mmap_page *user_page; | 532 | struct perf_event_mmap_page *user_page; |
533 | void *data_pages[0]; | 533 | void *data_pages[0]; |
534 | }; | 534 | }; |
535 | 535 | ||
@@ -694,14 +694,14 @@ struct perf_cpu_context { | |||
694 | }; | 694 | }; |
695 | 695 | ||
696 | struct perf_output_handle { | 696 | struct perf_output_handle { |
697 | struct perf_event *event; | 697 | struct perf_event *event; |
698 | struct perf_mmap_data *data; | 698 | struct perf_mmap_data *data; |
699 | unsigned long head; | 699 | unsigned long head; |
700 | unsigned long offset; | 700 | unsigned long offset; |
701 | int nmi; | 701 | int nmi; |
702 | int sample; | 702 | int sample; |
703 | int locked; | 703 | int locked; |
704 | unsigned long flags; | 704 | unsigned long flags; |
705 | }; | 705 | }; |
706 | 706 | ||
707 | #ifdef CONFIG_PERF_EVENTS | 707 | #ifdef CONFIG_PERF_EVENTS |
@@ -829,22 +829,22 @@ static inline void | |||
829 | perf_event_task_sched_out(struct task_struct *task, | 829 | perf_event_task_sched_out(struct task_struct *task, |
830 | struct task_struct *next, int cpu) { } | 830 | struct task_struct *next, int cpu) { } |
831 | static inline void | 831 | static inline void |
832 | perf_event_task_tick(struct task_struct *task, int cpu) { } | 832 | perf_event_task_tick(struct task_struct *task, int cpu) { } |
833 | static inline int perf_event_init_task(struct task_struct *child) { return 0; } | 833 | static inline int perf_event_init_task(struct task_struct *child) { return 0; } |
834 | static inline void perf_event_exit_task(struct task_struct *child) { } | 834 | static inline void perf_event_exit_task(struct task_struct *child) { } |
835 | static inline void perf_event_free_task(struct task_struct *task) { } | 835 | static inline void perf_event_free_task(struct task_struct *task) { } |
836 | static inline void perf_event_do_pending(void) { } | 836 | static inline void perf_event_do_pending(void) { } |
837 | static inline void perf_event_print_debug(void) { } | 837 | static inline void perf_event_print_debug(void) { } |
838 | static inline void perf_disable(void) { } | 838 | static inline void perf_disable(void) { } |
839 | static inline void perf_enable(void) { } | 839 | static inline void perf_enable(void) { } |
840 | static inline int perf_event_task_disable(void) { return -EINVAL; } | 840 | static inline int perf_event_task_disable(void) { return -EINVAL; } |
841 | static inline int perf_event_task_enable(void) { return -EINVAL; } | 841 | static inline int perf_event_task_enable(void) { return -EINVAL; } |
842 | 842 | ||
843 | static inline void | 843 | static inline void |
844 | perf_sw_event(u32 event_id, u64 nr, int nmi, | 844 | perf_sw_event(u32 event_id, u64 nr, int nmi, |
845 | struct pt_regs *regs, u64 addr) { } | 845 | struct pt_regs *regs, u64 addr) { } |
846 | 846 | ||
847 | static inline void perf_event_mmap(struct vm_area_struct *vma) { } | 847 | static inline void perf_event_mmap(struct vm_area_struct *vma) { } |
848 | static inline void perf_event_comm(struct task_struct *tsk) { } | 848 | static inline void perf_event_comm(struct task_struct *tsk) { } |
849 | static inline void perf_event_fork(struct task_struct *tsk) { } | 849 | static inline void perf_event_fork(struct task_struct *tsk) { } |
850 | static inline void perf_event_init(void) { } | 850 | static inline void perf_event_init(void) { } |
diff --git a/init/Kconfig b/init/Kconfig index cfdf5c322806..706728be312f 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -920,26 +920,31 @@ config HAVE_PERF_EVENTS | |||
920 | help | 920 | help |
921 | See tools/perf/design.txt for details. | 921 | See tools/perf/design.txt for details. |
922 | 922 | ||
923 | menu "Performance Counters" | 923 | menu "Kernel Performance Events And Counters" |
924 | 924 | ||
925 | config PERF_EVENTS | 925 | config PERF_EVENTS |
926 | bool "Kernel Performance Counters" | 926 | bool "Kernel performance events and counters" |
927 | default y if PROFILING | 927 | default y if (PROFILING || PERF_COUNTERS) |
928 | depends on HAVE_PERF_EVENTS | 928 | depends on HAVE_PERF_EVENTS |
929 | select ANON_INODES | 929 | select ANON_INODES |
930 | help | 930 | help |
931 | Enable kernel support for performance counter hardware. | 931 | Enable kernel support for various performance events provided |
932 | by software and hardware. | ||
932 | 933 | ||
933 | Performance counters are special hardware registers available | 934 | Software events are supported either build-in or via the |
934 | on most modern CPUs. These registers count the number of certain | 935 | use of generic tracepoints. |
936 | |||
937 | Most modern CPUs support performance events via performance | ||
938 | counter registers. These registers count the number of certain | ||
935 | types of hw events: such as instructions executed, cachemisses | 939 | types of hw events: such as instructions executed, cachemisses |
936 | suffered, or branches mis-predicted - without slowing down the | 940 | suffered, or branches mis-predicted - without slowing down the |
937 | kernel or applications. These registers can also trigger interrupts | 941 | kernel or applications. These registers can also trigger interrupts |
938 | when a threshold number of events have passed - and can thus be | 942 | when a threshold number of events have passed - and can thus be |
939 | used to profile the code that runs on that CPU. | 943 | used to profile the code that runs on that CPU. |
940 | 944 | ||
941 | The Linux Performance Counter subsystem provides an abstraction of | 945 | The Linux Performance Event subsystem provides an abstraction of |
942 | these hardware capabilities, available via a system call. It | 946 | these software and hardware cevent apabilities, available via a |
947 | system call and used by the "perf" utility in tools/perf/. It | ||
943 | provides per task and per CPU counters, and it provides event | 948 | provides per task and per CPU counters, and it provides event |
944 | capabilities on top of those. | 949 | capabilities on top of those. |
945 | 950 | ||
@@ -950,14 +955,26 @@ config EVENT_PROFILE | |||
950 | depends on PERF_EVENTS && EVENT_TRACING | 955 | depends on PERF_EVENTS && EVENT_TRACING |
951 | default y | 956 | default y |
952 | help | 957 | help |
953 | Allow the use of tracepoints as software performance counters. | 958 | Allow the use of tracepoints as software performance events. |
954 | 959 | ||
955 | When this is enabled, you can create perf counters based on | 960 | When this is enabled, you can create perf events based on |
956 | tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID | 961 | tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID |
957 | found in debugfs://tracing/events/*/*/id. (The -e/--events | 962 | found in debugfs://tracing/events/*/*/id. (The -e/--events |
958 | option to the perf tool can parse and interpret symbolic | 963 | option to the perf tool can parse and interpret symbolic |
959 | tracepoints, in the subsystem:tracepoint_name format.) | 964 | tracepoints, in the subsystem:tracepoint_name format.) |
960 | 965 | ||
966 | config PERF_COUNTERS | ||
967 | bool "Kernel performance counters (old config option)" | ||
968 | depends on HAVE_PERF_EVENTS | ||
969 | help | ||
970 | This config has been obsoleted by the PERF_EVENTS | ||
971 | config option - please see that one for details. | ||
972 | |||
973 | It has no effect on the kernel whether you enable | ||
974 | it or not, it is a compatibility placeholder. | ||
975 | |||
976 | Say N if unsure. | ||
977 | |||
961 | endmenu | 978 | endmenu |
962 | 979 | ||
963 | config VM_EVENT_COUNTERS | 980 | config VM_EVENT_COUNTERS |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 6e8b99a04e1e..76ac4db405e9 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -1,12 +1,12 @@ | |||
1 | /* | 1 | /* |
2 | * Performance event core code | 2 | * Performance events core code: |
3 | * | 3 | * |
4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | 4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> |
5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | 5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar |
6 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 6 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> |
7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | 7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> |
8 | * | 8 | * |
9 | * For licensing details see kernel-base/COPYING | 9 | * For licensing details see kernel-base/COPYING |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/fs.h> | 12 | #include <linux/fs.h> |