diff options
Diffstat (limited to 'drivers/oprofile/cpu_buffer.c')
-rw-r--r-- | drivers/oprofile/cpu_buffer.c | 307 |
1 files changed, 307 insertions, 0 deletions
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c new file mode 100644 index 000000000000..e9b1772a3a28 --- /dev/null +++ b/drivers/oprofile/cpu_buffer.c | |||
@@ -0,0 +1,307 @@ | |||
1 | /** | ||
2 | * @file cpu_buffer.c | ||
3 | * | ||
4 | * @remark Copyright 2002 OProfile authors | ||
5 | * @remark Read the file COPYING | ||
6 | * | ||
7 | * @author John Levon <levon@movementarian.org> | ||
8 | * | ||
9 | * Each CPU has a local buffer that stores PC value/event | ||
10 | * pairs. We also log context switches when we notice them. | ||
11 | * Eventually each CPU's buffer is processed into the global | ||
12 | * event buffer by sync_buffer(). | ||
13 | * | ||
14 | * We use a local buffer for two reasons: an NMI or similar | ||
15 | * interrupt cannot synchronise, and high sampling rates | ||
16 | * would lead to catastrophic global synchronisation if | ||
17 | * a global buffer was used. | ||
18 | */ | ||
19 | |||
20 | #include <linux/sched.h> | ||
21 | #include <linux/oprofile.h> | ||
22 | #include <linux/vmalloc.h> | ||
23 | #include <linux/errno.h> | ||
24 | |||
25 | #include "event_buffer.h" | ||
26 | #include "cpu_buffer.h" | ||
27 | #include "buffer_sync.h" | ||
28 | #include "oprof.h" | ||
29 | |||
30 | struct oprofile_cpu_buffer cpu_buffer[NR_CPUS] __cacheline_aligned; | ||
31 | |||
32 | static void wq_sync_buffer(void *); | ||
33 | |||
34 | #define DEFAULT_TIMER_EXPIRE (HZ / 10) | ||
35 | static int work_enabled; | ||
36 | |||
37 | void free_cpu_buffers(void) | ||
38 | { | ||
39 | int i; | ||
40 | |||
41 | for_each_online_cpu(i) { | ||
42 | vfree(cpu_buffer[i].buffer); | ||
43 | } | ||
44 | } | ||
45 | |||
46 | |||
47 | int alloc_cpu_buffers(void) | ||
48 | { | ||
49 | int i; | ||
50 | |||
51 | unsigned long buffer_size = fs_cpu_buffer_size; | ||
52 | |||
53 | for_each_online_cpu(i) { | ||
54 | struct oprofile_cpu_buffer * b = &cpu_buffer[i]; | ||
55 | |||
56 | b->buffer = vmalloc(sizeof(struct op_sample) * buffer_size); | ||
57 | if (!b->buffer) | ||
58 | goto fail; | ||
59 | |||
60 | b->last_task = NULL; | ||
61 | b->last_is_kernel = -1; | ||
62 | b->tracing = 0; | ||
63 | b->buffer_size = buffer_size; | ||
64 | b->tail_pos = 0; | ||
65 | b->head_pos = 0; | ||
66 | b->sample_received = 0; | ||
67 | b->sample_lost_overflow = 0; | ||
68 | b->cpu = i; | ||
69 | INIT_WORK(&b->work, wq_sync_buffer, b); | ||
70 | } | ||
71 | return 0; | ||
72 | |||
73 | fail: | ||
74 | free_cpu_buffers(); | ||
75 | return -ENOMEM; | ||
76 | } | ||
77 | |||
78 | |||
79 | void start_cpu_work(void) | ||
80 | { | ||
81 | int i; | ||
82 | |||
83 | work_enabled = 1; | ||
84 | |||
85 | for_each_online_cpu(i) { | ||
86 | struct oprofile_cpu_buffer * b = &cpu_buffer[i]; | ||
87 | |||
88 | /* | ||
89 | * Spread the work by 1 jiffy per cpu so they dont all | ||
90 | * fire at once. | ||
91 | */ | ||
92 | schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i); | ||
93 | } | ||
94 | } | ||
95 | |||
96 | |||
97 | void end_cpu_work(void) | ||
98 | { | ||
99 | int i; | ||
100 | |||
101 | work_enabled = 0; | ||
102 | |||
103 | for_each_online_cpu(i) { | ||
104 | struct oprofile_cpu_buffer * b = &cpu_buffer[i]; | ||
105 | |||
106 | cancel_delayed_work(&b->work); | ||
107 | } | ||
108 | |||
109 | flush_scheduled_work(); | ||
110 | } | ||
111 | |||
112 | |||
113 | /* Resets the cpu buffer to a sane state. */ | ||
114 | void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf) | ||
115 | { | ||
116 | /* reset these to invalid values; the next sample | ||
117 | * collected will populate the buffer with proper | ||
118 | * values to initialize the buffer | ||
119 | */ | ||
120 | cpu_buf->last_is_kernel = -1; | ||
121 | cpu_buf->last_task = NULL; | ||
122 | } | ||
123 | |||
124 | |||
125 | /* compute number of available slots in cpu_buffer queue */ | ||
126 | static unsigned long nr_available_slots(struct oprofile_cpu_buffer const * b) | ||
127 | { | ||
128 | unsigned long head = b->head_pos; | ||
129 | unsigned long tail = b->tail_pos; | ||
130 | |||
131 | if (tail > head) | ||
132 | return (tail - head) - 1; | ||
133 | |||
134 | return tail + (b->buffer_size - head) - 1; | ||
135 | } | ||
136 | |||
137 | |||
138 | static void increment_head(struct oprofile_cpu_buffer * b) | ||
139 | { | ||
140 | unsigned long new_head = b->head_pos + 1; | ||
141 | |||
142 | /* Ensure anything written to the slot before we | ||
143 | * increment is visible */ | ||
144 | wmb(); | ||
145 | |||
146 | if (new_head < b->buffer_size) | ||
147 | b->head_pos = new_head; | ||
148 | else | ||
149 | b->head_pos = 0; | ||
150 | } | ||
151 | |||
152 | |||
153 | |||
154 | |||
155 | inline static void | ||
156 | add_sample(struct oprofile_cpu_buffer * cpu_buf, | ||
157 | unsigned long pc, unsigned long event) | ||
158 | { | ||
159 | struct op_sample * entry = &cpu_buf->buffer[cpu_buf->head_pos]; | ||
160 | entry->eip = pc; | ||
161 | entry->event = event; | ||
162 | increment_head(cpu_buf); | ||
163 | } | ||
164 | |||
165 | |||
166 | inline static void | ||
167 | add_code(struct oprofile_cpu_buffer * buffer, unsigned long value) | ||
168 | { | ||
169 | add_sample(buffer, ESCAPE_CODE, value); | ||
170 | } | ||
171 | |||
172 | |||
173 | /* This must be safe from any context. It's safe writing here | ||
174 | * because of the head/tail separation of the writer and reader | ||
175 | * of the CPU buffer. | ||
176 | * | ||
177 | * is_kernel is needed because on some architectures you cannot | ||
178 | * tell if you are in kernel or user space simply by looking at | ||
179 | * pc. We tag this in the buffer by generating kernel enter/exit | ||
180 | * events whenever is_kernel changes | ||
181 | */ | ||
182 | static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, | ||
183 | int is_kernel, unsigned long event) | ||
184 | { | ||
185 | struct task_struct * task; | ||
186 | |||
187 | cpu_buf->sample_received++; | ||
188 | |||
189 | if (nr_available_slots(cpu_buf) < 3) { | ||
190 | cpu_buf->sample_lost_overflow++; | ||
191 | return 0; | ||
192 | } | ||
193 | |||
194 | is_kernel = !!is_kernel; | ||
195 | |||
196 | task = current; | ||
197 | |||
198 | /* notice a switch from user->kernel or vice versa */ | ||
199 | if (cpu_buf->last_is_kernel != is_kernel) { | ||
200 | cpu_buf->last_is_kernel = is_kernel; | ||
201 | add_code(cpu_buf, is_kernel); | ||
202 | } | ||
203 | |||
204 | /* notice a task switch */ | ||
205 | if (cpu_buf->last_task != task) { | ||
206 | cpu_buf->last_task = task; | ||
207 | add_code(cpu_buf, (unsigned long)task); | ||
208 | } | ||
209 | |||
210 | add_sample(cpu_buf, pc, event); | ||
211 | return 1; | ||
212 | } | ||
213 | |||
214 | static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf) | ||
215 | { | ||
216 | if (nr_available_slots(cpu_buf) < 4) { | ||
217 | cpu_buf->sample_lost_overflow++; | ||
218 | return 0; | ||
219 | } | ||
220 | |||
221 | add_code(cpu_buf, CPU_TRACE_BEGIN); | ||
222 | cpu_buf->tracing = 1; | ||
223 | return 1; | ||
224 | } | ||
225 | |||
226 | |||
227 | static void oprofile_end_trace(struct oprofile_cpu_buffer * cpu_buf) | ||
228 | { | ||
229 | cpu_buf->tracing = 0; | ||
230 | } | ||
231 | |||
232 | |||
233 | void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) | ||
234 | { | ||
235 | struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()]; | ||
236 | unsigned long pc = profile_pc(regs); | ||
237 | int is_kernel = !user_mode(regs); | ||
238 | |||
239 | if (!backtrace_depth) { | ||
240 | log_sample(cpu_buf, pc, is_kernel, event); | ||
241 | return; | ||
242 | } | ||
243 | |||
244 | if (!oprofile_begin_trace(cpu_buf)) | ||
245 | return; | ||
246 | |||
247 | /* if log_sample() fail we can't backtrace since we lost the source | ||
248 | * of this event */ | ||
249 | if (log_sample(cpu_buf, pc, is_kernel, event)) | ||
250 | oprofile_ops.backtrace(regs, backtrace_depth); | ||
251 | oprofile_end_trace(cpu_buf); | ||
252 | } | ||
253 | |||
254 | |||
255 | void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) | ||
256 | { | ||
257 | struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()]; | ||
258 | log_sample(cpu_buf, pc, is_kernel, event); | ||
259 | } | ||
260 | |||
261 | |||
262 | void oprofile_add_trace(unsigned long pc) | ||
263 | { | ||
264 | struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()]; | ||
265 | |||
266 | if (!cpu_buf->tracing) | ||
267 | return; | ||
268 | |||
269 | if (nr_available_slots(cpu_buf) < 1) { | ||
270 | cpu_buf->tracing = 0; | ||
271 | cpu_buf->sample_lost_overflow++; | ||
272 | return; | ||
273 | } | ||
274 | |||
275 | /* broken frame can give an eip with the same value as an escape code, | ||
276 | * abort the trace if we get it */ | ||
277 | if (pc == ESCAPE_CODE) { | ||
278 | cpu_buf->tracing = 0; | ||
279 | cpu_buf->backtrace_aborted++; | ||
280 | return; | ||
281 | } | ||
282 | |||
283 | add_sample(cpu_buf, pc, 0); | ||
284 | } | ||
285 | |||
286 | |||
287 | |||
288 | /* | ||
289 | * This serves to avoid cpu buffer overflow, and makes sure | ||
290 | * the task mortuary progresses | ||
291 | * | ||
292 | * By using schedule_delayed_work_on and then schedule_delayed_work | ||
293 | * we guarantee this will stay on the correct cpu | ||
294 | */ | ||
295 | static void wq_sync_buffer(void * data) | ||
296 | { | ||
297 | struct oprofile_cpu_buffer * b = data; | ||
298 | if (b->cpu != smp_processor_id()) { | ||
299 | printk("WQ on CPU%d, prefer CPU%d\n", | ||
300 | smp_processor_id(), b->cpu); | ||
301 | } | ||
302 | sync_buffer(b->cpu); | ||
303 | |||
304 | /* don't re-add the work if we're shutting down */ | ||
305 | if (work_enabled) | ||
306 | schedule_delayed_work(&b->work, DEFAULT_TIMER_EXPIRE); | ||
307 | } | ||