aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-12-11 14:40:18 -0500
committerIngo Molnar <mingo@elte.hu>2008-12-11 14:41:00 -0500
commit447557ac7ce120306b4a31d6003faef39cb1bf14 (patch)
tree6b2e223ca08c86f7eed1a1ac141a9fa0a49f4cc0
parent6a930700c8b655a9e25e42fc4adc0b225ebbcefc (diff)
perf counters: update docs
Impact: update docs Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--Documentation/perf-counters.txt107
1 files changed, 75 insertions, 32 deletions
diff --git a/Documentation/perf-counters.txt b/Documentation/perf-counters.txt
index 19033a0bb526..fddd32189a50 100644
--- a/Documentation/perf-counters.txt
+++ b/Documentation/perf-counters.txt
@@ -10,8 +10,8 @@ trigger interrupts when a threshold number of events have passed - and can
10thus be used to profile the code that runs on that CPU. 10thus be used to profile the code that runs on that CPU.
11 11
12The Linux Performance Counter subsystem provides an abstraction of these 12The Linux Performance Counter subsystem provides an abstraction of these
13hardware capabilities. It provides per task and per CPU counters, and 13hardware capabilities. It provides per task and per CPU counters, counter
14it provides event capabilities on top of those. 14groups, and it provides event capabilities on top of those.
15 15
16Performance counters are accessed via special file descriptors. 16Performance counters are accessed via special file descriptors.
17There's one file descriptor per virtual counter used. 17There's one file descriptor per virtual counter used.
@@ -19,12 +19,8 @@ There's one file descriptor per virtual counter used.
19The special file descriptor is opened via the perf_counter_open() 19The special file descriptor is opened via the perf_counter_open()
20system call: 20system call:
21 21
22 int 22 int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
23 perf_counter_open(u32 hw_event_type, 23 pid_t pid, int cpu, int group_fd);
24 u32 hw_event_period,
25 u32 record_type,
26 pid_t pid,
27 int cpu);
28 24
29The syscall returns the new fd. The fd can be used via the normal 25The syscall returns the new fd. The fd can be used via the normal
30VFS system calls: read() can be used to read the counter, fcntl() 26VFS system calls: read() can be used to read the counter, fcntl()
@@ -33,39 +29,78 @@ can be used to set the blocking mode, etc.
33Multiple counters can be kept open at a time, and the counters 29Multiple counters can be kept open at a time, and the counters
34can be poll()ed. 30can be poll()ed.
35 31
36When creating a new counter fd, 'hw_event_type' is one of: 32When creating a new counter fd, 'perf_counter_hw_event' is:
37 33
38 enum hw_event_types { 34/*
39 PERF_COUNT_CYCLES, 35 * Hardware event to monitor via a performance monitoring counter:
40 PERF_COUNT_INSTRUCTIONS, 36 */
41 PERF_COUNT_CACHE_REFERENCES, 37struct perf_counter_hw_event {
42 PERF_COUNT_CACHE_MISSES, 38 s64 type;
43 PERF_COUNT_BRANCH_INSTRUCTIONS, 39
44 PERF_COUNT_BRANCH_MISSES, 40 u64 irq_period;
45 }; 41 u32 record_type;
42
43 u32 disabled : 1, /* off by default */
44 nmi : 1, /* NMI sampling */
45 raw : 1, /* raw event type */
46 __reserved_1 : 29;
47
48 u64 __reserved_2;
49};
50
51/*
52 * Generalized performance counter event types, used by the hw_event.type
53 * parameter of the sys_perf_counter_open() syscall:
54 */
55enum hw_event_types {
56 /*
57 * Common hardware events, generalized by the kernel:
58 */
59 PERF_COUNT_CYCLES = 0,
60 PERF_COUNT_INSTRUCTIONS = 1,
61 PERF_COUNT_CACHE_REFERENCES = 2,
62 PERF_COUNT_CACHE_MISSES = 3,
63 PERF_COUNT_BRANCH_INSTRUCTIONS = 4,
64 PERF_COUNT_BRANCH_MISSES = 5,
65
66 /*
67 * Special "software" counters provided by the kernel, even if
68 * the hardware does not support performance counters. These
69 * counters measure various physical and sw events of the
70 * kernel (and allow the profiling of them as well):
71 */
72 PERF_COUNT_CPU_CLOCK = -1,
73 PERF_COUNT_TASK_CLOCK = -2,
74 /*
75 * Future software events:
76 */
77 /* PERF_COUNT_PAGE_FAULTS = -3,
78 PERF_COUNT_CONTEXT_SWITCHES = -4, */
79};
46 80
47These are standardized types of events that work uniformly on all CPUs 81These are standardized types of events that work uniformly on all CPUs
48that implements Performance Counters support under Linux. If a CPU is 82that implements Performance Counters support under Linux. If a CPU is
49not able to count branch-misses, then the system call will return 83not able to count branch-misses, then the system call will return
50-EINVAL. 84-EINVAL.
51 85
52[ Note: more hw_event_types are supported as well, but they are CPU 86More hw_event_types are supported as well, but they are CPU
53 specific and are enumerated via /sys on a per CPU basis. Raw hw event 87specific and are enumerated via /sys on a per CPU basis. Raw hw event
54 types can be passed in as negative numbers. For example, to count 88types can be passed in under hw_event.type if hw_event.raw is 1.
55 "External bus cycles while bus lock signal asserted" events on Intel 89For example, to count "External bus cycles while bus lock signal asserted"
56 Core CPUs, pass in a -0x4064 event type value. ] 90events on Intel Core CPUs, pass in a 0x4064 event type value and set
57 91hw_event.raw to 1.
58The parameter 'hw_event_period' is the number of events before waking up
59a read() that is blocked on a counter fd. Zero value means a non-blocking
60counter.
61 92
62'record_type' is the type of data that a read() will provide for the 93'record_type' is the type of data that a read() will provide for the
63counter, and it can be one of: 94counter, and it can be one of:
64 95
65 enum perf_record_type { 96/*
66 PERF_RECORD_SIMPLE, 97 * IRQ-notification data record type:
67 PERF_RECORD_IRQ, 98 */
68 }; 99enum perf_counter_record_type {
100 PERF_RECORD_SIMPLE = 0,
101 PERF_RECORD_IRQ = 1,
102 PERF_RECORD_GROUP = 2,
103};
69 104
70a "simple" counter is one that counts hardware events and allows 105a "simple" counter is one that counts hardware events and allows
71them to be read out into a u64 count value. (read() returns 8 on 106them to be read out into a u64 count value. (read() returns 8 on
@@ -76,6 +111,10 @@ the IP of the interrupted context. In this case read() will return
76the 8-byte counter value, plus the Instruction Pointer address of the 111the 8-byte counter value, plus the Instruction Pointer address of the
77interrupted context. 112interrupted context.
78 113
114The parameter 'hw_event_period' is the number of events before waking up
115a read() that is blocked on a counter fd. Zero value means a non-blocking
116counter.
117
79The 'pid' parameter allows the counter to be specific to a task: 118The 'pid' parameter allows the counter to be specific to a task:
80 119
81 pid == 0: if the pid parameter is zero, the counter is attached to the 120 pid == 0: if the pid parameter is zero, the counter is attached to the
@@ -92,7 +131,7 @@ CPU:
92 cpu >= 0: the counter is restricted to a specific CPU 131 cpu >= 0: the counter is restricted to a specific CPU
93 cpu == -1: the counter counts on all CPUs 132 cpu == -1: the counter counts on all CPUs
94 133
95Note: the combination of 'pid == -1' and 'cpu == -1' is not valid. 134(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.)
96 135
97A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts 136A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts
98events of that task and 'follows' that task to whatever CPU the task 137events of that task and 'follows' that task to whatever CPU the task
@@ -102,3 +141,7 @@ their own tasks.
102A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts 141A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
103all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege. 142all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
104 143
144Group counters are created by passing in a group_fd of another counter.
145Groups are scheduled at once and can be used with PERF_RECORD_GROUP
146to record multi-dimensional timestamps.
147