diff options
author | Barry Kasindorf <barry.kasindorf@amd.com> | 2008-07-22 15:08:54 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-26 05:48:04 -0400 |
commit | 345c25730d085c45622ac779da4dbd97dc3a10fe (patch) | |
tree | 12f31ee55b97ef9604b37990e05c523178c32901 /drivers | |
parent | ee648bc77f11b57d15a68d336fc30e343198f893 (diff) |
x86/oprofile: add IBS support for AMD CPUs, IBS buffer handling routines
This patchset supports the new profiling hardware available in the
latest AMD CPUs in the oProfile driver.
Signed-off-by: Barry Kasindorf <barry.kasindorf@amd.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Cc: oprofile-list <oprofile-list@lists.sourceforge.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/oprofile/buffer_sync.c | 72 | ||||
-rw-r--r-- | drivers/oprofile/cpu_buffer.c | 68 | ||||
-rw-r--r-- | drivers/oprofile/cpu_buffer.h | 2 |
3 files changed, 140 insertions, 2 deletions
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 615929f6f0c2..e1782d2df09f 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * @remark Read the file COPYING | 5 | * @remark Read the file COPYING |
6 | * | 6 | * |
7 | * @author John Levon <levon@movementarian.org> | 7 | * @author John Levon <levon@movementarian.org> |
8 | * @author Barry Kasindorf | ||
8 | * | 9 | * |
9 | * This is the core of the buffer management. Each | 10 | * This is the core of the buffer management. Each |
10 | * CPU buffer is processed and entered into the | 11 | * CPU buffer is processed and entered into the |
@@ -272,7 +273,7 @@ static void increment_tail(struct oprofile_cpu_buffer *b) | |||
272 | { | 273 | { |
273 | unsigned long new_tail = b->tail_pos + 1; | 274 | unsigned long new_tail = b->tail_pos + 1; |
274 | 275 | ||
275 | rmb(); | 276 | rmb(); /* be sure fifo pointers are synchromized */ |
276 | 277 | ||
277 | if (new_tail < b->buffer_size) | 278 | if (new_tail < b->buffer_size) |
278 | b->tail_pos = new_tail; | 279 | b->tail_pos = new_tail; |
@@ -327,6 +328,67 @@ static void add_trace_begin(void) | |||
327 | add_event_entry(TRACE_BEGIN_CODE); | 328 | add_event_entry(TRACE_BEGIN_CODE); |
328 | } | 329 | } |
329 | 330 | ||
331 | #define IBS_FETCH_CODE_SIZE 2 | ||
332 | #define IBS_OP_CODE_SIZE 5 | ||
333 | #define IBS_EIP(offset) \ | ||
334 | (((struct op_sample *)&cpu_buf->buffer[(offset)])->eip) | ||
335 | #define IBS_EVENT(offset) \ | ||
336 | (((struct op_sample *)&cpu_buf->buffer[(offset)])->event) | ||
337 | |||
338 | /* | ||
339 | * Add IBS fetch and op entries to event buffer | ||
340 | */ | ||
341 | static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code, | ||
342 | int in_kernel, struct mm_struct *mm) | ||
343 | { | ||
344 | unsigned long rip; | ||
345 | int i, count; | ||
346 | unsigned long ibs_cookie = 0; | ||
347 | off_t offset; | ||
348 | |||
349 | increment_tail(cpu_buf); /* move to RIP entry */ | ||
350 | |||
351 | rip = IBS_EIP(cpu_buf->tail_pos); | ||
352 | |||
353 | #ifdef __LP64__ | ||
354 | rip += IBS_EVENT(cpu_buf->tail_pos) << 32; | ||
355 | #endif | ||
356 | |||
357 | if (mm) { | ||
358 | ibs_cookie = lookup_dcookie(mm, rip, &offset); | ||
359 | |||
360 | if (ibs_cookie == NO_COOKIE) | ||
361 | offset = rip; | ||
362 | if (ibs_cookie == INVALID_COOKIE) { | ||
363 | atomic_inc(&oprofile_stats.sample_lost_no_mapping); | ||
364 | offset = rip; | ||
365 | } | ||
366 | if (ibs_cookie != last_cookie) { | ||
367 | add_cookie_switch(ibs_cookie); | ||
368 | last_cookie = ibs_cookie; | ||
369 | } | ||
370 | } else | ||
371 | offset = rip; | ||
372 | |||
373 | add_event_entry(ESCAPE_CODE); | ||
374 | add_event_entry(code); | ||
375 | add_event_entry(offset); /* Offset from Dcookie */ | ||
376 | |||
377 | /* we send the Dcookie offset, but send the raw Linear Add also*/ | ||
378 | add_event_entry(IBS_EIP(cpu_buf->tail_pos)); | ||
379 | add_event_entry(IBS_EVENT(cpu_buf->tail_pos)); | ||
380 | |||
381 | if (code == IBS_FETCH_CODE) | ||
382 | count = IBS_FETCH_CODE_SIZE; /*IBS FETCH is 2 int64s*/ | ||
383 | else | ||
384 | count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/ | ||
385 | |||
386 | for (i = 0; i < count; i++) { | ||
387 | increment_tail(cpu_buf); | ||
388 | add_event_entry(IBS_EIP(cpu_buf->tail_pos)); | ||
389 | add_event_entry(IBS_EVENT(cpu_buf->tail_pos)); | ||
390 | } | ||
391 | } | ||
330 | 392 | ||
331 | static void add_sample_entry(unsigned long offset, unsigned long event) | 393 | static void add_sample_entry(unsigned long offset, unsigned long event) |
332 | { | 394 | { |
@@ -524,6 +586,14 @@ void sync_buffer(int cpu) | |||
524 | } else if (s->event == CPU_TRACE_BEGIN) { | 586 | } else if (s->event == CPU_TRACE_BEGIN) { |
525 | state = sb_bt_start; | 587 | state = sb_bt_start; |
526 | add_trace_begin(); | 588 | add_trace_begin(); |
589 | } else if (s->event == IBS_FETCH_BEGIN) { | ||
590 | state = sb_bt_start; | ||
591 | add_ibs_begin(cpu_buf, | ||
592 | IBS_FETCH_CODE, in_kernel, mm); | ||
593 | } else if (s->event == IBS_OP_BEGIN) { | ||
594 | state = sb_bt_start; | ||
595 | add_ibs_begin(cpu_buf, | ||
596 | IBS_OP_CODE, in_kernel, mm); | ||
527 | } else { | 597 | } else { |
528 | struct mm_struct *oldmm = mm; | 598 | struct mm_struct *oldmm = mm; |
529 | 599 | ||
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 2450b3a393ff..c9ac4e156918 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * @remark Read the file COPYING | 5 | * @remark Read the file COPYING |
6 | * | 6 | * |
7 | * @author John Levon <levon@movementarian.org> | 7 | * @author John Levon <levon@movementarian.org> |
8 | * @author Barry Kasindorf <barry.kasindorf@amd.com> | ||
8 | * | 9 | * |
9 | * Each CPU has a local buffer that stores PC value/event | 10 | * Each CPU has a local buffer that stores PC value/event |
10 | * pairs. We also log context switches when we notice them. | 11 | * pairs. We also log context switches when we notice them. |
@@ -207,7 +208,7 @@ static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, | |||
207 | return 1; | 208 | return 1; |
208 | } | 209 | } |
209 | 210 | ||
210 | static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf) | 211 | static int oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf) |
211 | { | 212 | { |
212 | if (nr_available_slots(cpu_buf) < 4) { | 213 | if (nr_available_slots(cpu_buf) < 4) { |
213 | cpu_buf->sample_lost_overflow++; | 214 | cpu_buf->sample_lost_overflow++; |
@@ -252,6 +253,71 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) | |||
252 | oprofile_add_ext_sample(pc, regs, event, is_kernel); | 253 | oprofile_add_ext_sample(pc, regs, event, is_kernel); |
253 | } | 254 | } |
254 | 255 | ||
256 | #define MAX_IBS_SAMPLE_SIZE 14 | ||
257 | static int log_ibs_sample(struct oprofile_cpu_buffer *cpu_buf, | ||
258 | unsigned long pc, int is_kernel, unsigned int *ibs, int ibs_code) | ||
259 | { | ||
260 | struct task_struct *task; | ||
261 | |||
262 | cpu_buf->sample_received++; | ||
263 | |||
264 | if (nr_available_slots(cpu_buf) < MAX_IBS_SAMPLE_SIZE) { | ||
265 | cpu_buf->sample_lost_overflow++; | ||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | is_kernel = !!is_kernel; | ||
270 | |||
271 | /* notice a switch from user->kernel or vice versa */ | ||
272 | if (cpu_buf->last_is_kernel != is_kernel) { | ||
273 | cpu_buf->last_is_kernel = is_kernel; | ||
274 | add_code(cpu_buf, is_kernel); | ||
275 | } | ||
276 | |||
277 | /* notice a task switch */ | ||
278 | if (!is_kernel) { | ||
279 | task = current; | ||
280 | |||
281 | if (cpu_buf->last_task != task) { | ||
282 | cpu_buf->last_task = task; | ||
283 | add_code(cpu_buf, (unsigned long)task); | ||
284 | } | ||
285 | } | ||
286 | |||
287 | add_code(cpu_buf, ibs_code); | ||
288 | add_sample(cpu_buf, ibs[0], ibs[1]); | ||
289 | add_sample(cpu_buf, ibs[2], ibs[3]); | ||
290 | add_sample(cpu_buf, ibs[4], ibs[5]); | ||
291 | |||
292 | if (ibs_code == IBS_OP_BEGIN) { | ||
293 | add_sample(cpu_buf, ibs[6], ibs[7]); | ||
294 | add_sample(cpu_buf, ibs[8], ibs[9]); | ||
295 | add_sample(cpu_buf, ibs[10], ibs[11]); | ||
296 | } | ||
297 | |||
298 | return 1; | ||
299 | } | ||
300 | |||
301 | void oprofile_add_ibs_sample(struct pt_regs *const regs, | ||
302 | unsigned int * const ibs_sample, u8 code) | ||
303 | { | ||
304 | int is_kernel = !user_mode(regs); | ||
305 | unsigned long pc = profile_pc(regs); | ||
306 | |||
307 | struct oprofile_cpu_buffer *cpu_buf = | ||
308 | &per_cpu(cpu_buffer, smp_processor_id()); | ||
309 | |||
310 | if (!backtrace_depth) { | ||
311 | log_ibs_sample(cpu_buf, pc, is_kernel, ibs_sample, code); | ||
312 | return; | ||
313 | } | ||
314 | |||
315 | /* if log_sample() fails we can't backtrace since we lost the source | ||
316 | * of this event */ | ||
317 | if (log_ibs_sample(cpu_buf, pc, is_kernel, ibs_sample, code)) | ||
318 | oprofile_ops.backtrace(regs, backtrace_depth); | ||
319 | } | ||
320 | |||
255 | void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) | 321 | void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) |
256 | { | 322 | { |
257 | struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); | 323 | struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); |
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index c3e366b52261..9c44d004da69 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h | |||
@@ -55,5 +55,7 @@ void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf); | |||
55 | /* transient events for the CPU buffer -> event buffer */ | 55 | /* transient events for the CPU buffer -> event buffer */ |
56 | #define CPU_IS_KERNEL 1 | 56 | #define CPU_IS_KERNEL 1 |
57 | #define CPU_TRACE_BEGIN 2 | 57 | #define CPU_TRACE_BEGIN 2 |
58 | #define IBS_FETCH_BEGIN 3 | ||
59 | #define IBS_OP_BEGIN 4 | ||
58 | 60 | ||
59 | #endif /* OPROFILE_CPU_BUFFER_H */ | 61 | #endif /* OPROFILE_CPU_BUFFER_H */ |