aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBarry Kasindorf <barry.kasindorf@amd.com>2008-07-22 15:08:54 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-26 05:48:04 -0400
commit345c25730d085c45622ac779da4dbd97dc3a10fe (patch)
tree12f31ee55b97ef9604b37990e05c523178c32901
parentee648bc77f11b57d15a68d336fc30e343198f893 (diff)
x86/oprofile: add IBS support for AMD CPUs, IBS buffer handling routines
This patchset supports the new profiling hardware available in the latest AMD CPUs in the oProfile driver. Signed-off-by: Barry Kasindorf <barry.kasindorf@amd.com> Signed-off-by: Robert Richter <robert.richter@amd.com> Cc: oprofile-list <oprofile-list@lists.sourceforge.net> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--drivers/oprofile/buffer_sync.c72
-rw-r--r--drivers/oprofile/cpu_buffer.c68
-rw-r--r--drivers/oprofile/cpu_buffer.h2
3 files changed, 140 insertions, 2 deletions
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 615929f6f0c2..e1782d2df09f 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -5,6 +5,7 @@
5 * @remark Read the file COPYING 5 * @remark Read the file COPYING
6 * 6 *
7 * @author John Levon <levon@movementarian.org> 7 * @author John Levon <levon@movementarian.org>
8 * @author Barry Kasindorf
8 * 9 *
9 * This is the core of the buffer management. Each 10 * This is the core of the buffer management. Each
10 * CPU buffer is processed and entered into the 11 * CPU buffer is processed and entered into the
@@ -272,7 +273,7 @@ static void increment_tail(struct oprofile_cpu_buffer *b)
272{ 273{
273 unsigned long new_tail = b->tail_pos + 1; 274 unsigned long new_tail = b->tail_pos + 1;
274 275
275 rmb(); 276 rmb(); /* be sure fifo pointers are synchromized */
276 277
277 if (new_tail < b->buffer_size) 278 if (new_tail < b->buffer_size)
278 b->tail_pos = new_tail; 279 b->tail_pos = new_tail;
@@ -327,6 +328,67 @@ static void add_trace_begin(void)
327 add_event_entry(TRACE_BEGIN_CODE); 328 add_event_entry(TRACE_BEGIN_CODE);
328} 329}
329 330
331#define IBS_FETCH_CODE_SIZE 2
332#define IBS_OP_CODE_SIZE 5
333#define IBS_EIP(offset) \
334 (((struct op_sample *)&cpu_buf->buffer[(offset)])->eip)
335#define IBS_EVENT(offset) \
336 (((struct op_sample *)&cpu_buf->buffer[(offset)])->event)
337
338/*
339 * Add IBS fetch and op entries to event buffer
340 */
341static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code,
342 int in_kernel, struct mm_struct *mm)
343{
344 unsigned long rip;
345 int i, count;
346 unsigned long ibs_cookie = 0;
347 off_t offset;
348
349 increment_tail(cpu_buf); /* move to RIP entry */
350
351 rip = IBS_EIP(cpu_buf->tail_pos);
352
353#ifdef __LP64__
354 rip += IBS_EVENT(cpu_buf->tail_pos) << 32;
355#endif
356
357 if (mm) {
358 ibs_cookie = lookup_dcookie(mm, rip, &offset);
359
360 if (ibs_cookie == NO_COOKIE)
361 offset = rip;
362 if (ibs_cookie == INVALID_COOKIE) {
363 atomic_inc(&oprofile_stats.sample_lost_no_mapping);
364 offset = rip;
365 }
366 if (ibs_cookie != last_cookie) {
367 add_cookie_switch(ibs_cookie);
368 last_cookie = ibs_cookie;
369 }
370 } else
371 offset = rip;
372
373 add_event_entry(ESCAPE_CODE);
374 add_event_entry(code);
375 add_event_entry(offset); /* Offset from Dcookie */
376
377 /* we send the Dcookie offset, but send the raw Linear Add also*/
378 add_event_entry(IBS_EIP(cpu_buf->tail_pos));
379 add_event_entry(IBS_EVENT(cpu_buf->tail_pos));
380
381 if (code == IBS_FETCH_CODE)
382 count = IBS_FETCH_CODE_SIZE; /*IBS FETCH is 2 int64s*/
383 else
384 count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/
385
386 for (i = 0; i < count; i++) {
387 increment_tail(cpu_buf);
388 add_event_entry(IBS_EIP(cpu_buf->tail_pos));
389 add_event_entry(IBS_EVENT(cpu_buf->tail_pos));
390 }
391}
330 392
331static void add_sample_entry(unsigned long offset, unsigned long event) 393static void add_sample_entry(unsigned long offset, unsigned long event)
332{ 394{
@@ -524,6 +586,14 @@ void sync_buffer(int cpu)
524 } else if (s->event == CPU_TRACE_BEGIN) { 586 } else if (s->event == CPU_TRACE_BEGIN) {
525 state = sb_bt_start; 587 state = sb_bt_start;
526 add_trace_begin(); 588 add_trace_begin();
589 } else if (s->event == IBS_FETCH_BEGIN) {
590 state = sb_bt_start;
591 add_ibs_begin(cpu_buf,
592 IBS_FETCH_CODE, in_kernel, mm);
593 } else if (s->event == IBS_OP_BEGIN) {
594 state = sb_bt_start;
595 add_ibs_begin(cpu_buf,
596 IBS_OP_CODE, in_kernel, mm);
527 } else { 597 } else {
528 struct mm_struct *oldmm = mm; 598 struct mm_struct *oldmm = mm;
529 599
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 2450b3a393ff..c9ac4e156918 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -5,6 +5,7 @@
5 * @remark Read the file COPYING 5 * @remark Read the file COPYING
6 * 6 *
7 * @author John Levon <levon@movementarian.org> 7 * @author John Levon <levon@movementarian.org>
8 * @author Barry Kasindorf <barry.kasindorf@amd.com>
8 * 9 *
9 * Each CPU has a local buffer that stores PC value/event 10 * Each CPU has a local buffer that stores PC value/event
10 * pairs. We also log context switches when we notice them. 11 * pairs. We also log context switches when we notice them.
@@ -207,7 +208,7 @@ static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
207 return 1; 208 return 1;
208} 209}
209 210
210static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf) 211static int oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
211{ 212{
212 if (nr_available_slots(cpu_buf) < 4) { 213 if (nr_available_slots(cpu_buf) < 4) {
213 cpu_buf->sample_lost_overflow++; 214 cpu_buf->sample_lost_overflow++;
@@ -252,6 +253,71 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
252 oprofile_add_ext_sample(pc, regs, event, is_kernel); 253 oprofile_add_ext_sample(pc, regs, event, is_kernel);
253} 254}
254 255
256#define MAX_IBS_SAMPLE_SIZE 14
257static int log_ibs_sample(struct oprofile_cpu_buffer *cpu_buf,
258 unsigned long pc, int is_kernel, unsigned int *ibs, int ibs_code)
259{
260 struct task_struct *task;
261
262 cpu_buf->sample_received++;
263
264 if (nr_available_slots(cpu_buf) < MAX_IBS_SAMPLE_SIZE) {
265 cpu_buf->sample_lost_overflow++;
266 return 0;
267 }
268
269 is_kernel = !!is_kernel;
270
271 /* notice a switch from user->kernel or vice versa */
272 if (cpu_buf->last_is_kernel != is_kernel) {
273 cpu_buf->last_is_kernel = is_kernel;
274 add_code(cpu_buf, is_kernel);
275 }
276
277 /* notice a task switch */
278 if (!is_kernel) {
279 task = current;
280
281 if (cpu_buf->last_task != task) {
282 cpu_buf->last_task = task;
283 add_code(cpu_buf, (unsigned long)task);
284 }
285 }
286
287 add_code(cpu_buf, ibs_code);
288 add_sample(cpu_buf, ibs[0], ibs[1]);
289 add_sample(cpu_buf, ibs[2], ibs[3]);
290 add_sample(cpu_buf, ibs[4], ibs[5]);
291
292 if (ibs_code == IBS_OP_BEGIN) {
293 add_sample(cpu_buf, ibs[6], ibs[7]);
294 add_sample(cpu_buf, ibs[8], ibs[9]);
295 add_sample(cpu_buf, ibs[10], ibs[11]);
296 }
297
298 return 1;
299}
300
301void oprofile_add_ibs_sample(struct pt_regs *const regs,
302 unsigned int * const ibs_sample, u8 code)
303{
304 int is_kernel = !user_mode(regs);
305 unsigned long pc = profile_pc(regs);
306
307 struct oprofile_cpu_buffer *cpu_buf =
308 &per_cpu(cpu_buffer, smp_processor_id());
309
310 if (!backtrace_depth) {
311 log_ibs_sample(cpu_buf, pc, is_kernel, ibs_sample, code);
312 return;
313 }
314
315 /* if log_sample() fails we can't backtrace since we lost the source
316 * of this event */
317 if (log_ibs_sample(cpu_buf, pc, is_kernel, ibs_sample, code))
318 oprofile_ops.backtrace(regs, backtrace_depth);
319}
320
255void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) 321void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
256{ 322{
257 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); 323 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index c3e366b52261..9c44d004da69 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -55,5 +55,7 @@ void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
55/* transient events for the CPU buffer -> event buffer */ 55/* transient events for the CPU buffer -> event buffer */
56#define CPU_IS_KERNEL 1 56#define CPU_IS_KERNEL 1
57#define CPU_TRACE_BEGIN 2 57#define CPU_TRACE_BEGIN 2
58#define IBS_FETCH_BEGIN 3
59#define IBS_OP_BEGIN 4
58 60
59#endif /* OPROFILE_CPU_BUFFER_H */ 61#endif /* OPROFILE_CPU_BUFFER_H */