diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-11 17:01:07 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-11 17:01:07 -0400 |
commit | 8a1ca8cedd108c8e76a6ab34079d0bbb4f244799 (patch) | |
tree | 636c715524f1718599209cc289908ea44b6cb859 /include | |
parent | b640f042faa2a2fad6464f259a8afec06e2f6386 (diff) | |
parent | 940010c5a314a7bd9b498593bc6ba1718ac5aec5 (diff) |
Merge branch 'perfcounters-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (574 commits)
perf_counter: Turn off by default
perf_counter: Add counter->id to the throttle event
perf_counter: Better align code
perf_counter: Rename L2 to LL cache
perf_counter: Standardize event names
perf_counter: Rename enums
perf_counter tools: Clean up u64 usage
perf_counter: Rename perf_counter_limit sysctl
perf_counter: More paranoia settings
perf_counter: powerpc: Implement generalized cache events for POWER processors
perf_counters: powerpc: Add support for POWER7 processors
perf_counter: Accurate period data
perf_counter: Introduce struct for sample data
perf_counter tools: Normalize data using per sample period data
perf_counter: Annotate exit ctx recursion
perf_counter tools: Propagate signals properly
perf_counter tools: Small frequency related fixes
perf_counter: More aggressive frequency adjustment
perf_counter/x86: Fix the model number of Intel Core2 processors
perf_counter, x86: Correct some event and umask values for Intel processors
...
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/atomic.h | 2 | ||||
-rw-r--r-- | include/linux/init_task.h | 10 | ||||
-rw-r--r-- | include/linux/kernel_stat.h | 5 | ||||
-rw-r--r-- | include/linux/perf_counter.h | 697 | ||||
-rw-r--r-- | include/linux/prctl.h | 3 | ||||
-rw-r--r-- | include/linux/sched.h | 21 | ||||
-rw-r--r-- | include/linux/syscalls.h | 5 |
7 files changed, 741 insertions, 2 deletions
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 3673a13b6703..81d3be459efb 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h | |||
@@ -134,7 +134,7 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) | |||
134 | #define atomic_long_cmpxchg(l, old, new) \ | 134 | #define atomic_long_cmpxchg(l, old, new) \ |
135 | (atomic64_cmpxchg((atomic64_t *)(l), (old), (new))) | 135 | (atomic64_cmpxchg((atomic64_t *)(l), (old), (new))) |
136 | #define atomic_long_xchg(v, new) \ | 136 | #define atomic_long_xchg(v, new) \ |
137 | (atomic64_xchg((atomic64_t *)(l), (new))) | 137 | (atomic64_xchg((atomic64_t *)(v), (new))) |
138 | 138 | ||
139 | #else /* BITS_PER_LONG == 64 */ | 139 | #else /* BITS_PER_LONG == 64 */ |
140 | 140 | ||
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 6646bfc7b892..28b1f30601b5 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -108,6 +108,15 @@ extern struct group_info init_groups; | |||
108 | 108 | ||
109 | extern struct cred init_cred; | 109 | extern struct cred init_cred; |
110 | 110 | ||
111 | #ifdef CONFIG_PERF_COUNTERS | ||
112 | # define INIT_PERF_COUNTERS(tsk) \ | ||
113 | .perf_counter_mutex = \ | ||
114 | __MUTEX_INITIALIZER(tsk.perf_counter_mutex), \ | ||
115 | .perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list), | ||
116 | #else | ||
117 | # define INIT_PERF_COUNTERS(tsk) | ||
118 | #endif | ||
119 | |||
111 | /* | 120 | /* |
112 | * INIT_TASK is used to set up the first task table, touch at | 121 | * INIT_TASK is used to set up the first task table, touch at |
113 | * your own risk!. Base=0, limit=0x1fffff (=2MB) | 122 | * your own risk!. Base=0, limit=0x1fffff (=2MB) |
@@ -171,6 +180,7 @@ extern struct cred init_cred; | |||
171 | }, \ | 180 | }, \ |
172 | .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ | 181 | .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ |
173 | INIT_IDS \ | 182 | INIT_IDS \ |
183 | INIT_PERF_COUNTERS(tsk) \ | ||
174 | INIT_TRACE_IRQFLAGS \ | 184 | INIT_TRACE_IRQFLAGS \ |
175 | INIT_LOCKDEP \ | 185 | INIT_LOCKDEP \ |
176 | INIT_FTRACE_GRAPH \ | 186 | INIT_FTRACE_GRAPH \ |
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 0c8b89f28a95..a77c6007dc99 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h | |||
@@ -81,7 +81,12 @@ static inline unsigned int kstat_irqs(unsigned int irq) | |||
81 | return sum; | 81 | return sum; |
82 | } | 82 | } |
83 | 83 | ||
84 | |||
85 | /* | ||
86 | * Lock/unlock the current runqueue - to extract task statistics: | ||
87 | */ | ||
84 | extern unsigned long long task_delta_exec(struct task_struct *); | 88 | extern unsigned long long task_delta_exec(struct task_struct *); |
89 | |||
85 | extern void account_user_time(struct task_struct *, cputime_t, cputime_t); | 90 | extern void account_user_time(struct task_struct *, cputime_t, cputime_t); |
86 | extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); | 91 | extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); |
87 | extern void account_steal_time(cputime_t); | 92 | extern void account_steal_time(cputime_t); |
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h new file mode 100644 index 000000000000..6e133954e2e4 --- /dev/null +++ b/include/linux/perf_counter.h | |||
@@ -0,0 +1,697 @@ | |||
1 | /* | ||
2 | * Performance counters: | ||
3 | * | ||
4 | * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar | ||
6 | * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra | ||
7 | * | ||
8 | * Data type definitions, declarations, prototypes. | ||
9 | * | ||
10 | * Started by: Thomas Gleixner and Ingo Molnar | ||
11 | * | ||
12 | * For licencing details see kernel-base/COPYING | ||
13 | */ | ||
14 | #ifndef _LINUX_PERF_COUNTER_H | ||
15 | #define _LINUX_PERF_COUNTER_H | ||
16 | |||
17 | #include <linux/types.h> | ||
18 | #include <linux/ioctl.h> | ||
19 | #include <asm/byteorder.h> | ||
20 | |||
21 | /* | ||
22 | * User-space ABI bits: | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * attr.type | ||
27 | */ | ||
28 | enum perf_type_id { | ||
29 | PERF_TYPE_HARDWARE = 0, | ||
30 | PERF_TYPE_SOFTWARE = 1, | ||
31 | PERF_TYPE_TRACEPOINT = 2, | ||
32 | PERF_TYPE_HW_CACHE = 3, | ||
33 | PERF_TYPE_RAW = 4, | ||
34 | |||
35 | PERF_TYPE_MAX, /* non-ABI */ | ||
36 | }; | ||
37 | |||
38 | /* | ||
39 | * Generalized performance counter event types, used by the | ||
40 | * attr.event_id parameter of the sys_perf_counter_open() | ||
41 | * syscall: | ||
42 | */ | ||
43 | enum perf_hw_id { | ||
44 | /* | ||
45 | * Common hardware events, generalized by the kernel: | ||
46 | */ | ||
47 | PERF_COUNT_HW_CPU_CYCLES = 0, | ||
48 | PERF_COUNT_HW_INSTRUCTIONS = 1, | ||
49 | PERF_COUNT_HW_CACHE_REFERENCES = 2, | ||
50 | PERF_COUNT_HW_CACHE_MISSES = 3, | ||
51 | PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, | ||
52 | PERF_COUNT_HW_BRANCH_MISSES = 5, | ||
53 | PERF_COUNT_HW_BUS_CYCLES = 6, | ||
54 | |||
55 | PERF_COUNT_HW_MAX, /* non-ABI */ | ||
56 | }; | ||
57 | |||
58 | /* | ||
59 | * Generalized hardware cache counters: | ||
60 | * | ||
61 | * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x | ||
62 | * { read, write, prefetch } x | ||
63 | * { accesses, misses } | ||
64 | */ | ||
65 | enum perf_hw_cache_id { | ||
66 | PERF_COUNT_HW_CACHE_L1D = 0, | ||
67 | PERF_COUNT_HW_CACHE_L1I = 1, | ||
68 | PERF_COUNT_HW_CACHE_LL = 2, | ||
69 | PERF_COUNT_HW_CACHE_DTLB = 3, | ||
70 | PERF_COUNT_HW_CACHE_ITLB = 4, | ||
71 | PERF_COUNT_HW_CACHE_BPU = 5, | ||
72 | |||
73 | PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ | ||
74 | }; | ||
75 | |||
76 | enum perf_hw_cache_op_id { | ||
77 | PERF_COUNT_HW_CACHE_OP_READ = 0, | ||
78 | PERF_COUNT_HW_CACHE_OP_WRITE = 1, | ||
79 | PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, | ||
80 | |||
81 | PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ | ||
82 | }; | ||
83 | |||
84 | enum perf_hw_cache_op_result_id { | ||
85 | PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, | ||
86 | PERF_COUNT_HW_CACHE_RESULT_MISS = 1, | ||
87 | |||
88 | PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ | ||
89 | }; | ||
90 | |||
91 | /* | ||
92 | * Special "software" counters provided by the kernel, even if the hardware | ||
93 | * does not support performance counters. These counters measure various | ||
94 | * physical and sw events of the kernel (and allow the profiling of them as | ||
95 | * well): | ||
96 | */ | ||
97 | enum perf_sw_ids { | ||
98 | PERF_COUNT_SW_CPU_CLOCK = 0, | ||
99 | PERF_COUNT_SW_TASK_CLOCK = 1, | ||
100 | PERF_COUNT_SW_PAGE_FAULTS = 2, | ||
101 | PERF_COUNT_SW_CONTEXT_SWITCHES = 3, | ||
102 | PERF_COUNT_SW_CPU_MIGRATIONS = 4, | ||
103 | PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, | ||
104 | PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, | ||
105 | |||
106 | PERF_COUNT_SW_MAX, /* non-ABI */ | ||
107 | }; | ||
108 | |||
109 | /* | ||
110 | * Bits that can be set in attr.sample_type to request information | ||
111 | * in the overflow packets. | ||
112 | */ | ||
113 | enum perf_counter_sample_format { | ||
114 | PERF_SAMPLE_IP = 1U << 0, | ||
115 | PERF_SAMPLE_TID = 1U << 1, | ||
116 | PERF_SAMPLE_TIME = 1U << 2, | ||
117 | PERF_SAMPLE_ADDR = 1U << 3, | ||
118 | PERF_SAMPLE_GROUP = 1U << 4, | ||
119 | PERF_SAMPLE_CALLCHAIN = 1U << 5, | ||
120 | PERF_SAMPLE_ID = 1U << 6, | ||
121 | PERF_SAMPLE_CPU = 1U << 7, | ||
122 | PERF_SAMPLE_PERIOD = 1U << 8, | ||
123 | }; | ||
124 | |||
125 | /* | ||
126 | * Bits that can be set in attr.read_format to request that | ||
127 | * reads on the counter should return the indicated quantities, | ||
128 | * in increasing order of bit value, after the counter value. | ||
129 | */ | ||
130 | enum perf_counter_read_format { | ||
131 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, | ||
132 | PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, | ||
133 | PERF_FORMAT_ID = 1U << 2, | ||
134 | }; | ||
135 | |||
136 | /* | ||
137 | * Hardware event to monitor via a performance monitoring counter: | ||
138 | */ | ||
139 | struct perf_counter_attr { | ||
140 | /* | ||
141 | * Major type: hardware/software/tracepoint/etc. | ||
142 | */ | ||
143 | __u32 type; | ||
144 | __u32 __reserved_1; | ||
145 | |||
146 | /* | ||
147 | * Type specific configuration information. | ||
148 | */ | ||
149 | __u64 config; | ||
150 | |||
151 | union { | ||
152 | __u64 sample_period; | ||
153 | __u64 sample_freq; | ||
154 | }; | ||
155 | |||
156 | __u64 sample_type; | ||
157 | __u64 read_format; | ||
158 | |||
159 | __u64 disabled : 1, /* off by default */ | ||
160 | inherit : 1, /* children inherit it */ | ||
161 | pinned : 1, /* must always be on PMU */ | ||
162 | exclusive : 1, /* only group on PMU */ | ||
163 | exclude_user : 1, /* don't count user */ | ||
164 | exclude_kernel : 1, /* ditto kernel */ | ||
165 | exclude_hv : 1, /* ditto hypervisor */ | ||
166 | exclude_idle : 1, /* don't count when idle */ | ||
167 | mmap : 1, /* include mmap data */ | ||
168 | comm : 1, /* include comm data */ | ||
169 | freq : 1, /* use freq, not period */ | ||
170 | |||
171 | __reserved_2 : 53; | ||
172 | |||
173 | __u32 wakeup_events; /* wakeup every n events */ | ||
174 | __u32 __reserved_3; | ||
175 | |||
176 | __u64 __reserved_4; | ||
177 | }; | ||
178 | |||
179 | /* | ||
180 | * Ioctls that can be done on a perf counter fd: | ||
181 | */ | ||
182 | #define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) | ||
183 | #define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) | ||
184 | #define PERF_COUNTER_IOC_REFRESH _IO ('$', 2) | ||
185 | #define PERF_COUNTER_IOC_RESET _IO ('$', 3) | ||
186 | #define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) | ||
187 | |||
188 | enum perf_counter_ioc_flags { | ||
189 | PERF_IOC_FLAG_GROUP = 1U << 0, | ||
190 | }; | ||
191 | |||
192 | /* | ||
193 | * Structure of the page that can be mapped via mmap | ||
194 | */ | ||
195 | struct perf_counter_mmap_page { | ||
196 | __u32 version; /* version number of this structure */ | ||
197 | __u32 compat_version; /* lowest version this is compat with */ | ||
198 | |||
199 | /* | ||
200 | * Bits needed to read the hw counters in user-space. | ||
201 | * | ||
202 | * u32 seq; | ||
203 | * s64 count; | ||
204 | * | ||
205 | * do { | ||
206 | * seq = pc->lock; | ||
207 | * | ||
208 | * barrier() | ||
209 | * if (pc->index) { | ||
210 | * count = pmc_read(pc->index - 1); | ||
211 | * count += pc->offset; | ||
212 | * } else | ||
213 | * goto regular_read; | ||
214 | * | ||
215 | * barrier(); | ||
216 | * } while (pc->lock != seq); | ||
217 | * | ||
218 | * NOTE: for obvious reason this only works on self-monitoring | ||
219 | * processes. | ||
220 | */ | ||
221 | __u32 lock; /* seqlock for synchronization */ | ||
222 | __u32 index; /* hardware counter identifier */ | ||
223 | __s64 offset; /* add to hardware counter value */ | ||
224 | |||
225 | /* | ||
226 | * Control data for the mmap() data buffer. | ||
227 | * | ||
228 | * User-space reading this value should issue an rmb(), on SMP capable | ||
229 | * platforms, after reading this value -- see perf_counter_wakeup(). | ||
230 | */ | ||
231 | __u64 data_head; /* head in the data section */ | ||
232 | }; | ||
233 | |||
234 | #define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) | ||
235 | #define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0) | ||
236 | #define PERF_EVENT_MISC_KERNEL (1 << 0) | ||
237 | #define PERF_EVENT_MISC_USER (2 << 0) | ||
238 | #define PERF_EVENT_MISC_HYPERVISOR (3 << 0) | ||
239 | #define PERF_EVENT_MISC_OVERFLOW (1 << 2) | ||
240 | |||
241 | struct perf_event_header { | ||
242 | __u32 type; | ||
243 | __u16 misc; | ||
244 | __u16 size; | ||
245 | }; | ||
246 | |||
247 | enum perf_event_type { | ||
248 | |||
249 | /* | ||
250 | * The MMAP events record the PROT_EXEC mappings so that we can | ||
251 | * correlate userspace IPs to code. They have the following structure: | ||
252 | * | ||
253 | * struct { | ||
254 | * struct perf_event_header header; | ||
255 | * | ||
256 | * u32 pid, tid; | ||
257 | * u64 addr; | ||
258 | * u64 len; | ||
259 | * u64 pgoff; | ||
260 | * char filename[]; | ||
261 | * }; | ||
262 | */ | ||
263 | PERF_EVENT_MMAP = 1, | ||
264 | |||
265 | /* | ||
266 | * struct { | ||
267 | * struct perf_event_header header; | ||
268 | * | ||
269 | * u32 pid, tid; | ||
270 | * char comm[]; | ||
271 | * }; | ||
272 | */ | ||
273 | PERF_EVENT_COMM = 3, | ||
274 | |||
275 | /* | ||
276 | * struct { | ||
277 | * struct perf_event_header header; | ||
278 | * u64 time; | ||
279 | * u64 id; | ||
280 | * u64 sample_period; | ||
281 | * }; | ||
282 | */ | ||
283 | PERF_EVENT_PERIOD = 4, | ||
284 | |||
285 | /* | ||
286 | * struct { | ||
287 | * struct perf_event_header header; | ||
288 | * u64 time; | ||
289 | * u64 id; | ||
290 | * }; | ||
291 | */ | ||
292 | PERF_EVENT_THROTTLE = 5, | ||
293 | PERF_EVENT_UNTHROTTLE = 6, | ||
294 | |||
295 | /* | ||
296 | * struct { | ||
297 | * struct perf_event_header header; | ||
298 | * u32 pid, ppid; | ||
299 | * }; | ||
300 | */ | ||
301 | PERF_EVENT_FORK = 7, | ||
302 | |||
303 | /* | ||
304 | * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field | ||
305 | * will be PERF_RECORD_* | ||
306 | * | ||
307 | * struct { | ||
308 | * struct perf_event_header header; | ||
309 | * | ||
310 | * { u64 ip; } && PERF_RECORD_IP | ||
311 | * { u32 pid, tid; } && PERF_RECORD_TID | ||
312 | * { u64 time; } && PERF_RECORD_TIME | ||
313 | * { u64 addr; } && PERF_RECORD_ADDR | ||
314 | * { u64 config; } && PERF_RECORD_CONFIG | ||
315 | * { u32 cpu, res; } && PERF_RECORD_CPU | ||
316 | * | ||
317 | * { u64 nr; | ||
318 | * { u64 id, val; } cnt[nr]; } && PERF_RECORD_GROUP | ||
319 | * | ||
320 | * { u16 nr, | ||
321 | * hv, | ||
322 | * kernel, | ||
323 | * user; | ||
324 | * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN | ||
325 | * }; | ||
326 | */ | ||
327 | }; | ||
328 | |||
329 | #ifdef __KERNEL__ | ||
330 | /* | ||
331 | * Kernel-internal data types and definitions: | ||
332 | */ | ||
333 | |||
334 | #ifdef CONFIG_PERF_COUNTERS | ||
335 | # include <asm/perf_counter.h> | ||
336 | #endif | ||
337 | |||
338 | #include <linux/list.h> | ||
339 | #include <linux/mutex.h> | ||
340 | #include <linux/rculist.h> | ||
341 | #include <linux/rcupdate.h> | ||
342 | #include <linux/spinlock.h> | ||
343 | #include <linux/hrtimer.h> | ||
344 | #include <linux/fs.h> | ||
345 | #include <linux/pid_namespace.h> | ||
346 | #include <asm/atomic.h> | ||
347 | |||
348 | struct task_struct; | ||
349 | |||
350 | /** | ||
351 | * struct hw_perf_counter - performance counter hardware details: | ||
352 | */ | ||
353 | struct hw_perf_counter { | ||
354 | #ifdef CONFIG_PERF_COUNTERS | ||
355 | union { | ||
356 | struct { /* hardware */ | ||
357 | u64 config; | ||
358 | unsigned long config_base; | ||
359 | unsigned long counter_base; | ||
360 | int idx; | ||
361 | }; | ||
362 | union { /* software */ | ||
363 | atomic64_t count; | ||
364 | struct hrtimer hrtimer; | ||
365 | }; | ||
366 | }; | ||
367 | atomic64_t prev_count; | ||
368 | u64 sample_period; | ||
369 | u64 last_period; | ||
370 | atomic64_t period_left; | ||
371 | u64 interrupts; | ||
372 | |||
373 | u64 freq_count; | ||
374 | u64 freq_interrupts; | ||
375 | u64 freq_stamp; | ||
376 | #endif | ||
377 | }; | ||
378 | |||
379 | struct perf_counter; | ||
380 | |||
381 | /** | ||
382 | * struct pmu - generic performance monitoring unit | ||
383 | */ | ||
384 | struct pmu { | ||
385 | int (*enable) (struct perf_counter *counter); | ||
386 | void (*disable) (struct perf_counter *counter); | ||
387 | void (*read) (struct perf_counter *counter); | ||
388 | void (*unthrottle) (struct perf_counter *counter); | ||
389 | }; | ||
390 | |||
391 | /** | ||
392 | * enum perf_counter_active_state - the states of a counter | ||
393 | */ | ||
394 | enum perf_counter_active_state { | ||
395 | PERF_COUNTER_STATE_ERROR = -2, | ||
396 | PERF_COUNTER_STATE_OFF = -1, | ||
397 | PERF_COUNTER_STATE_INACTIVE = 0, | ||
398 | PERF_COUNTER_STATE_ACTIVE = 1, | ||
399 | }; | ||
400 | |||
401 | struct file; | ||
402 | |||
403 | struct perf_mmap_data { | ||
404 | struct rcu_head rcu_head; | ||
405 | int nr_pages; /* nr of data pages */ | ||
406 | int nr_locked; /* nr pages mlocked */ | ||
407 | |||
408 | atomic_t poll; /* POLL_ for wakeups */ | ||
409 | atomic_t events; /* event limit */ | ||
410 | |||
411 | atomic_long_t head; /* write position */ | ||
412 | atomic_long_t done_head; /* completed head */ | ||
413 | |||
414 | atomic_t lock; /* concurrent writes */ | ||
415 | |||
416 | atomic_t wakeup; /* needs a wakeup */ | ||
417 | |||
418 | struct perf_counter_mmap_page *user_page; | ||
419 | void *data_pages[0]; | ||
420 | }; | ||
421 | |||
422 | struct perf_pending_entry { | ||
423 | struct perf_pending_entry *next; | ||
424 | void (*func)(struct perf_pending_entry *); | ||
425 | }; | ||
426 | |||
427 | /** | ||
428 | * struct perf_counter - performance counter kernel representation: | ||
429 | */ | ||
430 | struct perf_counter { | ||
431 | #ifdef CONFIG_PERF_COUNTERS | ||
432 | struct list_head list_entry; | ||
433 | struct list_head event_entry; | ||
434 | struct list_head sibling_list; | ||
435 | int nr_siblings; | ||
436 | struct perf_counter *group_leader; | ||
437 | const struct pmu *pmu; | ||
438 | |||
439 | enum perf_counter_active_state state; | ||
440 | atomic64_t count; | ||
441 | |||
442 | /* | ||
443 | * These are the total time in nanoseconds that the counter | ||
444 | * has been enabled (i.e. eligible to run, and the task has | ||
445 | * been scheduled in, if this is a per-task counter) | ||
446 | * and running (scheduled onto the CPU), respectively. | ||
447 | * | ||
448 | * They are computed from tstamp_enabled, tstamp_running and | ||
449 | * tstamp_stopped when the counter is in INACTIVE or ACTIVE state. | ||
450 | */ | ||
451 | u64 total_time_enabled; | ||
452 | u64 total_time_running; | ||
453 | |||
454 | /* | ||
455 | * These are timestamps used for computing total_time_enabled | ||
456 | * and total_time_running when the counter is in INACTIVE or | ||
457 | * ACTIVE state, measured in nanoseconds from an arbitrary point | ||
458 | * in time. | ||
459 | * tstamp_enabled: the notional time when the counter was enabled | ||
460 | * tstamp_running: the notional time when the counter was scheduled on | ||
461 | * tstamp_stopped: in INACTIVE state, the notional time when the | ||
462 | * counter was scheduled off. | ||
463 | */ | ||
464 | u64 tstamp_enabled; | ||
465 | u64 tstamp_running; | ||
466 | u64 tstamp_stopped; | ||
467 | |||
468 | struct perf_counter_attr attr; | ||
469 | struct hw_perf_counter hw; | ||
470 | |||
471 | struct perf_counter_context *ctx; | ||
472 | struct file *filp; | ||
473 | |||
474 | /* | ||
475 | * These accumulate total time (in nanoseconds) that children | ||
476 | * counters have been enabled and running, respectively. | ||
477 | */ | ||
478 | atomic64_t child_total_time_enabled; | ||
479 | atomic64_t child_total_time_running; | ||
480 | |||
481 | /* | ||
482 | * Protect attach/detach and child_list: | ||
483 | */ | ||
484 | struct mutex child_mutex; | ||
485 | struct list_head child_list; | ||
486 | struct perf_counter *parent; | ||
487 | |||
488 | int oncpu; | ||
489 | int cpu; | ||
490 | |||
491 | struct list_head owner_entry; | ||
492 | struct task_struct *owner; | ||
493 | |||
494 | /* mmap bits */ | ||
495 | struct mutex mmap_mutex; | ||
496 | atomic_t mmap_count; | ||
497 | struct perf_mmap_data *data; | ||
498 | |||
499 | /* poll related */ | ||
500 | wait_queue_head_t waitq; | ||
501 | struct fasync_struct *fasync; | ||
502 | |||
503 | /* delayed work for NMIs and such */ | ||
504 | int pending_wakeup; | ||
505 | int pending_kill; | ||
506 | int pending_disable; | ||
507 | struct perf_pending_entry pending; | ||
508 | |||
509 | atomic_t event_limit; | ||
510 | |||
511 | void (*destroy)(struct perf_counter *); | ||
512 | struct rcu_head rcu_head; | ||
513 | |||
514 | struct pid_namespace *ns; | ||
515 | u64 id; | ||
516 | #endif | ||
517 | }; | ||
518 | |||
519 | /** | ||
520 | * struct perf_counter_context - counter context structure | ||
521 | * | ||
522 | * Used as a container for task counters and CPU counters as well: | ||
523 | */ | ||
524 | struct perf_counter_context { | ||
525 | /* | ||
526 | * Protect the states of the counters in the list, | ||
527 | * nr_active, and the list: | ||
528 | */ | ||
529 | spinlock_t lock; | ||
530 | /* | ||
531 | * Protect the list of counters. Locking either mutex or lock | ||
532 | * is sufficient to ensure the list doesn't change; to change | ||
533 | * the list you need to lock both the mutex and the spinlock. | ||
534 | */ | ||
535 | struct mutex mutex; | ||
536 | |||
537 | struct list_head counter_list; | ||
538 | struct list_head event_list; | ||
539 | int nr_counters; | ||
540 | int nr_active; | ||
541 | int is_active; | ||
542 | atomic_t refcount; | ||
543 | struct task_struct *task; | ||
544 | |||
545 | /* | ||
546 | * Context clock, runs when context enabled. | ||
547 | */ | ||
548 | u64 time; | ||
549 | u64 timestamp; | ||
550 | |||
551 | /* | ||
552 | * These fields let us detect when two contexts have both | ||
553 | * been cloned (inherited) from a common ancestor. | ||
554 | */ | ||
555 | struct perf_counter_context *parent_ctx; | ||
556 | u64 parent_gen; | ||
557 | u64 generation; | ||
558 | int pin_count; | ||
559 | struct rcu_head rcu_head; | ||
560 | }; | ||
561 | |||
562 | /** | ||
563 | * struct perf_counter_cpu_context - per cpu counter context structure | ||
564 | */ | ||
565 | struct perf_cpu_context { | ||
566 | struct perf_counter_context ctx; | ||
567 | struct perf_counter_context *task_ctx; | ||
568 | int active_oncpu; | ||
569 | int max_pertask; | ||
570 | int exclusive; | ||
571 | |||
572 | /* | ||
573 | * Recursion avoidance: | ||
574 | * | ||
575 | * task, softirq, irq, nmi context | ||
576 | */ | ||
577 | int recursion[4]; | ||
578 | }; | ||
579 | |||
580 | #ifdef CONFIG_PERF_COUNTERS | ||
581 | |||
582 | /* | ||
583 | * Set by architecture code: | ||
584 | */ | ||
585 | extern int perf_max_counters; | ||
586 | |||
587 | extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter); | ||
588 | |||
589 | extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); | ||
590 | extern void perf_counter_task_sched_out(struct task_struct *task, | ||
591 | struct task_struct *next, int cpu); | ||
592 | extern void perf_counter_task_tick(struct task_struct *task, int cpu); | ||
593 | extern int perf_counter_init_task(struct task_struct *child); | ||
594 | extern void perf_counter_exit_task(struct task_struct *child); | ||
595 | extern void perf_counter_free_task(struct task_struct *task); | ||
596 | extern void perf_counter_do_pending(void); | ||
597 | extern void perf_counter_print_debug(void); | ||
598 | extern void __perf_disable(void); | ||
599 | extern bool __perf_enable(void); | ||
600 | extern void perf_disable(void); | ||
601 | extern void perf_enable(void); | ||
602 | extern int perf_counter_task_disable(void); | ||
603 | extern int perf_counter_task_enable(void); | ||
604 | extern int hw_perf_group_sched_in(struct perf_counter *group_leader, | ||
605 | struct perf_cpu_context *cpuctx, | ||
606 | struct perf_counter_context *ctx, int cpu); | ||
607 | extern void perf_counter_update_userpage(struct perf_counter *counter); | ||
608 | |||
609 | struct perf_sample_data { | ||
610 | struct pt_regs *regs; | ||
611 | u64 addr; | ||
612 | u64 period; | ||
613 | }; | ||
614 | |||
615 | extern int perf_counter_overflow(struct perf_counter *counter, int nmi, | ||
616 | struct perf_sample_data *data); | ||
617 | |||
618 | /* | ||
619 | * Return 1 for a software counter, 0 for a hardware counter | ||
620 | */ | ||
621 | static inline int is_software_counter(struct perf_counter *counter) | ||
622 | { | ||
623 | return (counter->attr.type != PERF_TYPE_RAW) && | ||
624 | (counter->attr.type != PERF_TYPE_HARDWARE); | ||
625 | } | ||
626 | |||
627 | extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); | ||
628 | |||
629 | extern void __perf_counter_mmap(struct vm_area_struct *vma); | ||
630 | |||
631 | static inline void perf_counter_mmap(struct vm_area_struct *vma) | ||
632 | { | ||
633 | if (vma->vm_flags & VM_EXEC) | ||
634 | __perf_counter_mmap(vma); | ||
635 | } | ||
636 | |||
637 | extern void perf_counter_comm(struct task_struct *tsk); | ||
638 | extern void perf_counter_fork(struct task_struct *tsk); | ||
639 | |||
640 | extern void perf_counter_task_migration(struct task_struct *task, int cpu); | ||
641 | |||
642 | #define MAX_STACK_DEPTH 255 | ||
643 | |||
644 | struct perf_callchain_entry { | ||
645 | u16 nr; | ||
646 | u16 hv; | ||
647 | u16 kernel; | ||
648 | u16 user; | ||
649 | u64 ip[MAX_STACK_DEPTH]; | ||
650 | }; | ||
651 | |||
652 | extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); | ||
653 | |||
654 | extern int sysctl_perf_counter_paranoid; | ||
655 | extern int sysctl_perf_counter_mlock; | ||
656 | extern int sysctl_perf_counter_sample_rate; | ||
657 | |||
658 | extern void perf_counter_init(void); | ||
659 | |||
660 | #ifndef perf_misc_flags | ||
661 | #define perf_misc_flags(regs) (user_mode(regs) ? PERF_EVENT_MISC_USER : \ | ||
662 | PERF_EVENT_MISC_KERNEL) | ||
663 | #define perf_instruction_pointer(regs) instruction_pointer(regs) | ||
664 | #endif | ||
665 | |||
666 | #else | ||
667 | static inline void | ||
668 | perf_counter_task_sched_in(struct task_struct *task, int cpu) { } | ||
669 | static inline void | ||
670 | perf_counter_task_sched_out(struct task_struct *task, | ||
671 | struct task_struct *next, int cpu) { } | ||
672 | static inline void | ||
673 | perf_counter_task_tick(struct task_struct *task, int cpu) { } | ||
674 | static inline int perf_counter_init_task(struct task_struct *child) { return 0; } | ||
675 | static inline void perf_counter_exit_task(struct task_struct *child) { } | ||
676 | static inline void perf_counter_free_task(struct task_struct *task) { } | ||
677 | static inline void perf_counter_do_pending(void) { } | ||
678 | static inline void perf_counter_print_debug(void) { } | ||
679 | static inline void perf_disable(void) { } | ||
680 | static inline void perf_enable(void) { } | ||
681 | static inline int perf_counter_task_disable(void) { return -EINVAL; } | ||
682 | static inline int perf_counter_task_enable(void) { return -EINVAL; } | ||
683 | |||
684 | static inline void | ||
685 | perf_swcounter_event(u32 event, u64 nr, int nmi, | ||
686 | struct pt_regs *regs, u64 addr) { } | ||
687 | |||
688 | static inline void perf_counter_mmap(struct vm_area_struct *vma) { } | ||
689 | static inline void perf_counter_comm(struct task_struct *tsk) { } | ||
690 | static inline void perf_counter_fork(struct task_struct *tsk) { } | ||
691 | static inline void perf_counter_init(void) { } | ||
692 | static inline void perf_counter_task_migration(struct task_struct *task, | ||
693 | int cpu) { } | ||
694 | #endif | ||
695 | |||
696 | #endif /* __KERNEL__ */ | ||
697 | #endif /* _LINUX_PERF_COUNTER_H */ | ||
diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 48d887e3c6e7..b00df4c79c63 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h | |||
@@ -85,4 +85,7 @@ | |||
85 | #define PR_SET_TIMERSLACK 29 | 85 | #define PR_SET_TIMERSLACK 29 |
86 | #define PR_GET_TIMERSLACK 30 | 86 | #define PR_GET_TIMERSLACK 30 |
87 | 87 | ||
88 | #define PR_TASK_PERF_COUNTERS_DISABLE 31 | ||
89 | #define PR_TASK_PERF_COUNTERS_ENABLE 32 | ||
90 | |||
88 | #endif /* _LINUX_PRCTL_H */ | 91 | #endif /* _LINUX_PRCTL_H */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 42bf2766111e..4896fdfec913 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -99,6 +99,7 @@ struct robust_list_head; | |||
99 | struct bio; | 99 | struct bio; |
100 | struct fs_struct; | 100 | struct fs_struct; |
101 | struct bts_context; | 101 | struct bts_context; |
102 | struct perf_counter_context; | ||
102 | 103 | ||
103 | /* | 104 | /* |
104 | * List of flags we want to share for kernel threads, | 105 | * List of flags we want to share for kernel threads, |
@@ -139,6 +140,7 @@ extern unsigned long nr_running(void); | |||
139 | extern unsigned long nr_uninterruptible(void); | 140 | extern unsigned long nr_uninterruptible(void); |
140 | extern unsigned long nr_iowait(void); | 141 | extern unsigned long nr_iowait(void); |
141 | extern void calc_global_load(void); | 142 | extern void calc_global_load(void); |
143 | extern u64 cpu_nr_migrations(int cpu); | ||
142 | 144 | ||
143 | extern unsigned long get_parent_ip(unsigned long addr); | 145 | extern unsigned long get_parent_ip(unsigned long addr); |
144 | 146 | ||
@@ -674,6 +676,10 @@ struct user_struct { | |||
674 | struct work_struct work; | 676 | struct work_struct work; |
675 | #endif | 677 | #endif |
676 | #endif | 678 | #endif |
679 | |||
680 | #ifdef CONFIG_PERF_COUNTERS | ||
681 | atomic_long_t locked_vm; | ||
682 | #endif | ||
677 | }; | 683 | }; |
678 | 684 | ||
679 | extern int uids_sysfs_init(void); | 685 | extern int uids_sysfs_init(void); |
@@ -1073,9 +1079,10 @@ struct sched_entity { | |||
1073 | u64 last_wakeup; | 1079 | u64 last_wakeup; |
1074 | u64 avg_overlap; | 1080 | u64 avg_overlap; |
1075 | 1081 | ||
1082 | u64 nr_migrations; | ||
1083 | |||
1076 | u64 start_runtime; | 1084 | u64 start_runtime; |
1077 | u64 avg_wakeup; | 1085 | u64 avg_wakeup; |
1078 | u64 nr_migrations; | ||
1079 | 1086 | ||
1080 | #ifdef CONFIG_SCHEDSTATS | 1087 | #ifdef CONFIG_SCHEDSTATS |
1081 | u64 wait_start; | 1088 | u64 wait_start; |
@@ -1396,6 +1403,11 @@ struct task_struct { | |||
1396 | struct list_head pi_state_list; | 1403 | struct list_head pi_state_list; |
1397 | struct futex_pi_state *pi_state_cache; | 1404 | struct futex_pi_state *pi_state_cache; |
1398 | #endif | 1405 | #endif |
1406 | #ifdef CONFIG_PERF_COUNTERS | ||
1407 | struct perf_counter_context *perf_counter_ctxp; | ||
1408 | struct mutex perf_counter_mutex; | ||
1409 | struct list_head perf_counter_list; | ||
1410 | #endif | ||
1399 | #ifdef CONFIG_NUMA | 1411 | #ifdef CONFIG_NUMA |
1400 | struct mempolicy *mempolicy; | 1412 | struct mempolicy *mempolicy; |
1401 | short il_next; | 1413 | short il_next; |
@@ -2410,6 +2422,13 @@ static inline void inc_syscw(struct task_struct *tsk) | |||
2410 | #define TASK_SIZE_OF(tsk) TASK_SIZE | 2422 | #define TASK_SIZE_OF(tsk) TASK_SIZE |
2411 | #endif | 2423 | #endif |
2412 | 2424 | ||
2425 | /* | ||
2426 | * Call the function if the target task is executing on a CPU right now: | ||
2427 | */ | ||
2428 | extern void task_oncpu_function_call(struct task_struct *p, | ||
2429 | void (*func) (void *info), void *info); | ||
2430 | |||
2431 | |||
2413 | #ifdef CONFIG_MM_OWNER | 2432 | #ifdef CONFIG_MM_OWNER |
2414 | extern void mm_update_next_owner(struct mm_struct *mm); | 2433 | extern void mm_update_next_owner(struct mm_struct *mm); |
2415 | extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); | 2434 | extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 30520844b8da..c6c84ad8bd71 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -55,6 +55,7 @@ struct compat_timeval; | |||
55 | struct robust_list_head; | 55 | struct robust_list_head; |
56 | struct getcpu_cache; | 56 | struct getcpu_cache; |
57 | struct old_linux_dirent; | 57 | struct old_linux_dirent; |
58 | struct perf_counter_attr; | ||
58 | 59 | ||
59 | #include <linux/types.h> | 60 | #include <linux/types.h> |
60 | #include <linux/aio_abi.h> | 61 | #include <linux/aio_abi.h> |
@@ -755,4 +756,8 @@ asmlinkage long sys_pipe(int __user *); | |||
755 | 756 | ||
756 | int kernel_execve(const char *filename, char *const argv[], char *const envp[]); | 757 | int kernel_execve(const char *filename, char *const argv[], char *const envp[]); |
757 | 758 | ||
759 | |||
760 | asmlinkage long sys_perf_counter_open( | ||
761 | const struct perf_counter_attr __user *attr_uptr, | ||
762 | pid_t pid, int cpu, int group_fd, unsigned long flags); | ||
758 | #endif | 763 | #endif |