diff options
Diffstat (limited to 'drivers/oprofile')
-rw-r--r-- | drivers/oprofile/buffer_sync.c | 209 | ||||
-rw-r--r-- | drivers/oprofile/cpu_buffer.c | 74 | ||||
-rw-r--r-- | drivers/oprofile/cpu_buffer.h | 2 |
3 files changed, 217 insertions, 68 deletions
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 9304c4555079..ed982273fb8b 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * @remark Read the file COPYING | 5 | * @remark Read the file COPYING |
6 | * | 6 | * |
7 | * @author John Levon <levon@movementarian.org> | 7 | * @author John Levon <levon@movementarian.org> |
8 | * @author Barry Kasindorf | ||
8 | * | 9 | * |
9 | * This is the core of the buffer management. Each | 10 | * This is the core of the buffer management. Each |
10 | * CPU buffer is processed and entered into the | 11 | * CPU buffer is processed and entered into the |
@@ -33,7 +34,7 @@ | |||
33 | #include "event_buffer.h" | 34 | #include "event_buffer.h" |
34 | #include "cpu_buffer.h" | 35 | #include "cpu_buffer.h" |
35 | #include "buffer_sync.h" | 36 | #include "buffer_sync.h" |
36 | 37 | ||
37 | static LIST_HEAD(dying_tasks); | 38 | static LIST_HEAD(dying_tasks); |
38 | static LIST_HEAD(dead_tasks); | 39 | static LIST_HEAD(dead_tasks); |
39 | static cpumask_t marked_cpus = CPU_MASK_NONE; | 40 | static cpumask_t marked_cpus = CPU_MASK_NONE; |
@@ -48,10 +49,11 @@ static void process_task_mortuary(void); | |||
48 | * Can be invoked from softirq via RCU callback due to | 49 | * Can be invoked from softirq via RCU callback due to |
49 | * call_rcu() of the task struct, hence the _irqsave. | 50 | * call_rcu() of the task struct, hence the _irqsave. |
50 | */ | 51 | */ |
51 | static int task_free_notify(struct notifier_block * self, unsigned long val, void * data) | 52 | static int |
53 | task_free_notify(struct notifier_block *self, unsigned long val, void *data) | ||
52 | { | 54 | { |
53 | unsigned long flags; | 55 | unsigned long flags; |
54 | struct task_struct * task = data; | 56 | struct task_struct *task = data; |
55 | spin_lock_irqsave(&task_mortuary, flags); | 57 | spin_lock_irqsave(&task_mortuary, flags); |
56 | list_add(&task->tasks, &dying_tasks); | 58 | list_add(&task->tasks, &dying_tasks); |
57 | spin_unlock_irqrestore(&task_mortuary, flags); | 59 | spin_unlock_irqrestore(&task_mortuary, flags); |
@@ -62,13 +64,14 @@ static int task_free_notify(struct notifier_block * self, unsigned long val, voi | |||
62 | /* The task is on its way out. A sync of the buffer means we can catch | 64 | /* The task is on its way out. A sync of the buffer means we can catch |
63 | * any remaining samples for this task. | 65 | * any remaining samples for this task. |
64 | */ | 66 | */ |
65 | static int task_exit_notify(struct notifier_block * self, unsigned long val, void * data) | 67 | static int |
68 | task_exit_notify(struct notifier_block *self, unsigned long val, void *data) | ||
66 | { | 69 | { |
67 | /* To avoid latency problems, we only process the current CPU, | 70 | /* To avoid latency problems, we only process the current CPU, |
68 | * hoping that most samples for the task are on this CPU | 71 | * hoping that most samples for the task are on this CPU |
69 | */ | 72 | */ |
70 | sync_buffer(raw_smp_processor_id()); | 73 | sync_buffer(raw_smp_processor_id()); |
71 | return 0; | 74 | return 0; |
72 | } | 75 | } |
73 | 76 | ||
74 | 77 | ||
@@ -77,11 +80,12 @@ static int task_exit_notify(struct notifier_block * self, unsigned long val, voi | |||
77 | * we don't lose any. This does not have to be exact, it's a QoI issue | 80 | * we don't lose any. This does not have to be exact, it's a QoI issue |
78 | * only. | 81 | * only. |
79 | */ | 82 | */ |
80 | static int munmap_notify(struct notifier_block * self, unsigned long val, void * data) | 83 | static int |
84 | munmap_notify(struct notifier_block *self, unsigned long val, void *data) | ||
81 | { | 85 | { |
82 | unsigned long addr = (unsigned long)data; | 86 | unsigned long addr = (unsigned long)data; |
83 | struct mm_struct * mm = current->mm; | 87 | struct mm_struct *mm = current->mm; |
84 | struct vm_area_struct * mpnt; | 88 | struct vm_area_struct *mpnt; |
85 | 89 | ||
86 | down_read(&mm->mmap_sem); | 90 | down_read(&mm->mmap_sem); |
87 | 91 | ||
@@ -99,11 +103,12 @@ static int munmap_notify(struct notifier_block * self, unsigned long val, void * | |||
99 | return 0; | 103 | return 0; |
100 | } | 104 | } |
101 | 105 | ||
102 | 106 | ||
103 | /* We need to be told about new modules so we don't attribute to a previously | 107 | /* We need to be told about new modules so we don't attribute to a previously |
104 | * loaded module, or drop the samples on the floor. | 108 | * loaded module, or drop the samples on the floor. |
105 | */ | 109 | */ |
106 | static int module_load_notify(struct notifier_block * self, unsigned long val, void * data) | 110 | static int |
111 | module_load_notify(struct notifier_block *self, unsigned long val, void *data) | ||
107 | { | 112 | { |
108 | #ifdef CONFIG_MODULES | 113 | #ifdef CONFIG_MODULES |
109 | if (val != MODULE_STATE_COMING) | 114 | if (val != MODULE_STATE_COMING) |
@@ -118,7 +123,7 @@ static int module_load_notify(struct notifier_block * self, unsigned long val, v | |||
118 | return 0; | 123 | return 0; |
119 | } | 124 | } |
120 | 125 | ||
121 | 126 | ||
122 | static struct notifier_block task_free_nb = { | 127 | static struct notifier_block task_free_nb = { |
123 | .notifier_call = task_free_notify, | 128 | .notifier_call = task_free_notify, |
124 | }; | 129 | }; |
@@ -135,7 +140,7 @@ static struct notifier_block module_load_nb = { | |||
135 | .notifier_call = module_load_notify, | 140 | .notifier_call = module_load_notify, |
136 | }; | 141 | }; |
137 | 142 | ||
138 | 143 | ||
139 | static void end_sync(void) | 144 | static void end_sync(void) |
140 | { | 145 | { |
141 | end_cpu_work(); | 146 | end_cpu_work(); |
@@ -208,14 +213,14 @@ static inline unsigned long fast_get_dcookie(struct path *path) | |||
208 | * not strictly necessary but allows oprofile to associate | 213 | * not strictly necessary but allows oprofile to associate |
209 | * shared-library samples with particular applications | 214 | * shared-library samples with particular applications |
210 | */ | 215 | */ |
211 | static unsigned long get_exec_dcookie(struct mm_struct * mm) | 216 | static unsigned long get_exec_dcookie(struct mm_struct *mm) |
212 | { | 217 | { |
213 | unsigned long cookie = NO_COOKIE; | 218 | unsigned long cookie = NO_COOKIE; |
214 | struct vm_area_struct * vma; | 219 | struct vm_area_struct *vma; |
215 | 220 | ||
216 | if (!mm) | 221 | if (!mm) |
217 | goto out; | 222 | goto out; |
218 | 223 | ||
219 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 224 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
220 | if (!vma->vm_file) | 225 | if (!vma->vm_file) |
221 | continue; | 226 | continue; |
@@ -235,13 +240,14 @@ out: | |||
235 | * sure to do this lookup before a mm->mmap modification happens so | 240 | * sure to do this lookup before a mm->mmap modification happens so |
236 | * we don't lose track. | 241 | * we don't lose track. |
237 | */ | 242 | */ |
238 | static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset) | 243 | static unsigned long |
244 | lookup_dcookie(struct mm_struct *mm, unsigned long addr, off_t *offset) | ||
239 | { | 245 | { |
240 | unsigned long cookie = NO_COOKIE; | 246 | unsigned long cookie = NO_COOKIE; |
241 | struct vm_area_struct * vma; | 247 | struct vm_area_struct *vma; |
242 | 248 | ||
243 | for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { | 249 | for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { |
244 | 250 | ||
245 | if (addr < vma->vm_start || addr >= vma->vm_end) | 251 | if (addr < vma->vm_start || addr >= vma->vm_end) |
246 | continue; | 252 | continue; |
247 | 253 | ||
@@ -263,9 +269,20 @@ static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, o | |||
263 | return cookie; | 269 | return cookie; |
264 | } | 270 | } |
265 | 271 | ||
272 | static void increment_tail(struct oprofile_cpu_buffer *b) | ||
273 | { | ||
274 | unsigned long new_tail = b->tail_pos + 1; | ||
275 | |||
276 | rmb(); /* be sure fifo pointers are synchromized */ | ||
277 | |||
278 | if (new_tail < b->buffer_size) | ||
279 | b->tail_pos = new_tail; | ||
280 | else | ||
281 | b->tail_pos = 0; | ||
282 | } | ||
266 | 283 | ||
267 | static unsigned long last_cookie = INVALID_COOKIE; | 284 | static unsigned long last_cookie = INVALID_COOKIE; |
268 | 285 | ||
269 | static void add_cpu_switch(int i) | 286 | static void add_cpu_switch(int i) |
270 | { | 287 | { |
271 | add_event_entry(ESCAPE_CODE); | 288 | add_event_entry(ESCAPE_CODE); |
@@ -278,16 +295,16 @@ static void add_kernel_ctx_switch(unsigned int in_kernel) | |||
278 | { | 295 | { |
279 | add_event_entry(ESCAPE_CODE); | 296 | add_event_entry(ESCAPE_CODE); |
280 | if (in_kernel) | 297 | if (in_kernel) |
281 | add_event_entry(KERNEL_ENTER_SWITCH_CODE); | 298 | add_event_entry(KERNEL_ENTER_SWITCH_CODE); |
282 | else | 299 | else |
283 | add_event_entry(KERNEL_EXIT_SWITCH_CODE); | 300 | add_event_entry(KERNEL_EXIT_SWITCH_CODE); |
284 | } | 301 | } |
285 | 302 | ||
286 | static void | 303 | static void |
287 | add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) | 304 | add_user_ctx_switch(struct task_struct const *task, unsigned long cookie) |
288 | { | 305 | { |
289 | add_event_entry(ESCAPE_CODE); | 306 | add_event_entry(ESCAPE_CODE); |
290 | add_event_entry(CTX_SWITCH_CODE); | 307 | add_event_entry(CTX_SWITCH_CODE); |
291 | add_event_entry(task->pid); | 308 | add_event_entry(task->pid); |
292 | add_event_entry(cookie); | 309 | add_event_entry(cookie); |
293 | /* Another code for daemon back-compat */ | 310 | /* Another code for daemon back-compat */ |
@@ -296,7 +313,7 @@ add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) | |||
296 | add_event_entry(task->tgid); | 313 | add_event_entry(task->tgid); |
297 | } | 314 | } |
298 | 315 | ||
299 | 316 | ||
300 | static void add_cookie_switch(unsigned long cookie) | 317 | static void add_cookie_switch(unsigned long cookie) |
301 | { | 318 | { |
302 | add_event_entry(ESCAPE_CODE); | 319 | add_event_entry(ESCAPE_CODE); |
@@ -304,13 +321,78 @@ static void add_cookie_switch(unsigned long cookie) | |||
304 | add_event_entry(cookie); | 321 | add_event_entry(cookie); |
305 | } | 322 | } |
306 | 323 | ||
307 | 324 | ||
308 | static void add_trace_begin(void) | 325 | static void add_trace_begin(void) |
309 | { | 326 | { |
310 | add_event_entry(ESCAPE_CODE); | 327 | add_event_entry(ESCAPE_CODE); |
311 | add_event_entry(TRACE_BEGIN_CODE); | 328 | add_event_entry(TRACE_BEGIN_CODE); |
312 | } | 329 | } |
313 | 330 | ||
331 | #ifdef CONFIG_OPROFILE_IBS | ||
332 | |||
333 | #define IBS_FETCH_CODE_SIZE 2 | ||
334 | #define IBS_OP_CODE_SIZE 5 | ||
335 | #define IBS_EIP(offset) \ | ||
336 | (((struct op_sample *)&cpu_buf->buffer[(offset)])->eip) | ||
337 | #define IBS_EVENT(offset) \ | ||
338 | (((struct op_sample *)&cpu_buf->buffer[(offset)])->event) | ||
339 | |||
340 | /* | ||
341 | * Add IBS fetch and op entries to event buffer | ||
342 | */ | ||
343 | static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code, | ||
344 | int in_kernel, struct mm_struct *mm) | ||
345 | { | ||
346 | unsigned long rip; | ||
347 | int i, count; | ||
348 | unsigned long ibs_cookie = 0; | ||
349 | off_t offset; | ||
350 | |||
351 | increment_tail(cpu_buf); /* move to RIP entry */ | ||
352 | |||
353 | rip = IBS_EIP(cpu_buf->tail_pos); | ||
354 | |||
355 | #ifdef __LP64__ | ||
356 | rip += IBS_EVENT(cpu_buf->tail_pos) << 32; | ||
357 | #endif | ||
358 | |||
359 | if (mm) { | ||
360 | ibs_cookie = lookup_dcookie(mm, rip, &offset); | ||
361 | |||
362 | if (ibs_cookie == NO_COOKIE) | ||
363 | offset = rip; | ||
364 | if (ibs_cookie == INVALID_COOKIE) { | ||
365 | atomic_inc(&oprofile_stats.sample_lost_no_mapping); | ||
366 | offset = rip; | ||
367 | } | ||
368 | if (ibs_cookie != last_cookie) { | ||
369 | add_cookie_switch(ibs_cookie); | ||
370 | last_cookie = ibs_cookie; | ||
371 | } | ||
372 | } else | ||
373 | offset = rip; | ||
374 | |||
375 | add_event_entry(ESCAPE_CODE); | ||
376 | add_event_entry(code); | ||
377 | add_event_entry(offset); /* Offset from Dcookie */ | ||
378 | |||
379 | /* we send the Dcookie offset, but send the raw Linear Add also*/ | ||
380 | add_event_entry(IBS_EIP(cpu_buf->tail_pos)); | ||
381 | add_event_entry(IBS_EVENT(cpu_buf->tail_pos)); | ||
382 | |||
383 | if (code == IBS_FETCH_CODE) | ||
384 | count = IBS_FETCH_CODE_SIZE; /*IBS FETCH is 2 int64s*/ | ||
385 | else | ||
386 | count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/ | ||
387 | |||
388 | for (i = 0; i < count; i++) { | ||
389 | increment_tail(cpu_buf); | ||
390 | add_event_entry(IBS_EIP(cpu_buf->tail_pos)); | ||
391 | add_event_entry(IBS_EVENT(cpu_buf->tail_pos)); | ||
392 | } | ||
393 | } | ||
394 | |||
395 | #endif | ||
314 | 396 | ||
315 | static void add_sample_entry(unsigned long offset, unsigned long event) | 397 | static void add_sample_entry(unsigned long offset, unsigned long event) |
316 | { | 398 | { |
@@ -319,13 +401,13 @@ static void add_sample_entry(unsigned long offset, unsigned long event) | |||
319 | } | 401 | } |
320 | 402 | ||
321 | 403 | ||
322 | static int add_us_sample(struct mm_struct * mm, struct op_sample * s) | 404 | static int add_us_sample(struct mm_struct *mm, struct op_sample *s) |
323 | { | 405 | { |
324 | unsigned long cookie; | 406 | unsigned long cookie; |
325 | off_t offset; | 407 | off_t offset; |
326 | 408 | ||
327 | cookie = lookup_dcookie(mm, s->eip, &offset); | 409 | cookie = lookup_dcookie(mm, s->eip, &offset); |
328 | 410 | ||
329 | if (cookie == INVALID_COOKIE) { | 411 | if (cookie == INVALID_COOKIE) { |
330 | atomic_inc(&oprofile_stats.sample_lost_no_mapping); | 412 | atomic_inc(&oprofile_stats.sample_lost_no_mapping); |
331 | return 0; | 413 | return 0; |
@@ -341,13 +423,13 @@ static int add_us_sample(struct mm_struct * mm, struct op_sample * s) | |||
341 | return 1; | 423 | return 1; |
342 | } | 424 | } |
343 | 425 | ||
344 | 426 | ||
345 | /* Add a sample to the global event buffer. If possible the | 427 | /* Add a sample to the global event buffer. If possible the |
346 | * sample is converted into a persistent dentry/offset pair | 428 | * sample is converted into a persistent dentry/offset pair |
347 | * for later lookup from userspace. | 429 | * for later lookup from userspace. |
348 | */ | 430 | */ |
349 | static int | 431 | static int |
350 | add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) | 432 | add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel) |
351 | { | 433 | { |
352 | if (in_kernel) { | 434 | if (in_kernel) { |
353 | add_sample_entry(s->eip, s->event); | 435 | add_sample_entry(s->eip, s->event); |
@@ -359,9 +441,9 @@ add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) | |||
359 | } | 441 | } |
360 | return 0; | 442 | return 0; |
361 | } | 443 | } |
362 | |||
363 | 444 | ||
364 | static void release_mm(struct mm_struct * mm) | 445 | |
446 | static void release_mm(struct mm_struct *mm) | ||
365 | { | 447 | { |
366 | if (!mm) | 448 | if (!mm) |
367 | return; | 449 | return; |
@@ -370,9 +452,9 @@ static void release_mm(struct mm_struct * mm) | |||
370 | } | 452 | } |
371 | 453 | ||
372 | 454 | ||
373 | static struct mm_struct * take_tasks_mm(struct task_struct * task) | 455 | static struct mm_struct *take_tasks_mm(struct task_struct *task) |
374 | { | 456 | { |
375 | struct mm_struct * mm = get_task_mm(task); | 457 | struct mm_struct *mm = get_task_mm(task); |
376 | if (mm) | 458 | if (mm) |
377 | down_read(&mm->mmap_sem); | 459 | down_read(&mm->mmap_sem); |
378 | return mm; | 460 | return mm; |
@@ -383,10 +465,10 @@ static inline int is_code(unsigned long val) | |||
383 | { | 465 | { |
384 | return val == ESCAPE_CODE; | 466 | return val == ESCAPE_CODE; |
385 | } | 467 | } |
386 | 468 | ||
387 | 469 | ||
388 | /* "acquire" as many cpu buffer slots as we can */ | 470 | /* "acquire" as many cpu buffer slots as we can */ |
389 | static unsigned long get_slots(struct oprofile_cpu_buffer * b) | 471 | static unsigned long get_slots(struct oprofile_cpu_buffer *b) |
390 | { | 472 | { |
391 | unsigned long head = b->head_pos; | 473 | unsigned long head = b->head_pos; |
392 | unsigned long tail = b->tail_pos; | 474 | unsigned long tail = b->tail_pos; |
@@ -412,19 +494,6 @@ static unsigned long get_slots(struct oprofile_cpu_buffer * b) | |||
412 | } | 494 | } |
413 | 495 | ||
414 | 496 | ||
415 | static void increment_tail(struct oprofile_cpu_buffer * b) | ||
416 | { | ||
417 | unsigned long new_tail = b->tail_pos + 1; | ||
418 | |||
419 | rmb(); | ||
420 | |||
421 | if (new_tail < b->buffer_size) | ||
422 | b->tail_pos = new_tail; | ||
423 | else | ||
424 | b->tail_pos = 0; | ||
425 | } | ||
426 | |||
427 | |||
428 | /* Move tasks along towards death. Any tasks on dead_tasks | 497 | /* Move tasks along towards death. Any tasks on dead_tasks |
429 | * will definitely have no remaining references in any | 498 | * will definitely have no remaining references in any |
430 | * CPU buffers at this point, because we use two lists, | 499 | * CPU buffers at this point, because we use two lists, |
@@ -435,8 +504,8 @@ static void process_task_mortuary(void) | |||
435 | { | 504 | { |
436 | unsigned long flags; | 505 | unsigned long flags; |
437 | LIST_HEAD(local_dead_tasks); | 506 | LIST_HEAD(local_dead_tasks); |
438 | struct task_struct * task; | 507 | struct task_struct *task; |
439 | struct task_struct * ttask; | 508 | struct task_struct *ttask; |
440 | 509 | ||
441 | spin_lock_irqsave(&task_mortuary, flags); | 510 | spin_lock_irqsave(&task_mortuary, flags); |
442 | 511 | ||
@@ -493,7 +562,7 @@ void sync_buffer(int cpu) | |||
493 | { | 562 | { |
494 | struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu); | 563 | struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu); |
495 | struct mm_struct *mm = NULL; | 564 | struct mm_struct *mm = NULL; |
496 | struct task_struct * new; | 565 | struct task_struct *new; |
497 | unsigned long cookie = 0; | 566 | unsigned long cookie = 0; |
498 | int in_kernel = 1; | 567 | int in_kernel = 1; |
499 | unsigned int i; | 568 | unsigned int i; |
@@ -501,7 +570,7 @@ void sync_buffer(int cpu) | |||
501 | unsigned long available; | 570 | unsigned long available; |
502 | 571 | ||
503 | mutex_lock(&buffer_mutex); | 572 | mutex_lock(&buffer_mutex); |
504 | 573 | ||
505 | add_cpu_switch(cpu); | 574 | add_cpu_switch(cpu); |
506 | 575 | ||
507 | /* Remember, only we can modify tail_pos */ | 576 | /* Remember, only we can modify tail_pos */ |
@@ -509,8 +578,8 @@ void sync_buffer(int cpu) | |||
509 | available = get_slots(cpu_buf); | 578 | available = get_slots(cpu_buf); |
510 | 579 | ||
511 | for (i = 0; i < available; ++i) { | 580 | for (i = 0; i < available; ++i) { |
512 | struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; | 581 | struct op_sample *s = &cpu_buf->buffer[cpu_buf->tail_pos]; |
513 | 582 | ||
514 | if (is_code(s->eip)) { | 583 | if (is_code(s->eip)) { |
515 | if (s->event <= CPU_IS_KERNEL) { | 584 | if (s->event <= CPU_IS_KERNEL) { |
516 | /* kernel/userspace switch */ | 585 | /* kernel/userspace switch */ |
@@ -521,8 +590,18 @@ void sync_buffer(int cpu) | |||
521 | } else if (s->event == CPU_TRACE_BEGIN) { | 590 | } else if (s->event == CPU_TRACE_BEGIN) { |
522 | state = sb_bt_start; | 591 | state = sb_bt_start; |
523 | add_trace_begin(); | 592 | add_trace_begin(); |
593 | #ifdef CONFIG_OPROFILE_IBS | ||
594 | } else if (s->event == IBS_FETCH_BEGIN) { | ||
595 | state = sb_bt_start; | ||
596 | add_ibs_begin(cpu_buf, | ||
597 | IBS_FETCH_CODE, in_kernel, mm); | ||
598 | } else if (s->event == IBS_OP_BEGIN) { | ||
599 | state = sb_bt_start; | ||
600 | add_ibs_begin(cpu_buf, | ||
601 | IBS_OP_CODE, in_kernel, mm); | ||
602 | #endif | ||
524 | } else { | 603 | } else { |
525 | struct mm_struct * oldmm = mm; | 604 | struct mm_struct *oldmm = mm; |
526 | 605 | ||
527 | /* userspace context switch */ | 606 | /* userspace context switch */ |
528 | new = (struct task_struct *)s->event; | 607 | new = (struct task_struct *)s->event; |
@@ -533,13 +612,11 @@ void sync_buffer(int cpu) | |||
533 | cookie = get_exec_dcookie(mm); | 612 | cookie = get_exec_dcookie(mm); |
534 | add_user_ctx_switch(new, cookie); | 613 | add_user_ctx_switch(new, cookie); |
535 | } | 614 | } |
536 | } else { | 615 | } else if (state >= sb_bt_start && |
537 | if (state >= sb_bt_start && | 616 | !add_sample(mm, s, in_kernel)) { |
538 | !add_sample(mm, s, in_kernel)) { | 617 | if (state == sb_bt_start) { |
539 | if (state == sb_bt_start) { | 618 | state = sb_bt_ignore; |
540 | state = sb_bt_ignore; | 619 | atomic_inc(&oprofile_stats.bt_lost_no_mapping); |
541 | atomic_inc(&oprofile_stats.bt_lost_no_mapping); | ||
542 | } | ||
543 | } | 620 | } |
544 | } | 621 | } |
545 | 622 | ||
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 7ba78e6d210e..e1bd5a937f6c 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * @remark Read the file COPYING | 5 | * @remark Read the file COPYING |
6 | * | 6 | * |
7 | * @author John Levon <levon@movementarian.org> | 7 | * @author John Levon <levon@movementarian.org> |
8 | * @author Barry Kasindorf <barry.kasindorf@amd.com> | ||
8 | * | 9 | * |
9 | * Each CPU has a local buffer that stores PC value/event | 10 | * Each CPU has a local buffer that stores PC value/event |
10 | * pairs. We also log context switches when we notice them. | 11 | * pairs. We also log context switches when we notice them. |
@@ -209,7 +210,7 @@ static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, | |||
209 | return 1; | 210 | return 1; |
210 | } | 211 | } |
211 | 212 | ||
212 | static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf) | 213 | static int oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf) |
213 | { | 214 | { |
214 | if (nr_available_slots(cpu_buf) < 4) { | 215 | if (nr_available_slots(cpu_buf) < 4) { |
215 | cpu_buf->sample_lost_overflow++; | 216 | cpu_buf->sample_lost_overflow++; |
@@ -254,6 +255,75 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) | |||
254 | oprofile_add_ext_sample(pc, regs, event, is_kernel); | 255 | oprofile_add_ext_sample(pc, regs, event, is_kernel); |
255 | } | 256 | } |
256 | 257 | ||
258 | #ifdef CONFIG_OPROFILE_IBS | ||
259 | |||
260 | #define MAX_IBS_SAMPLE_SIZE 14 | ||
261 | static int log_ibs_sample(struct oprofile_cpu_buffer *cpu_buf, | ||
262 | unsigned long pc, int is_kernel, unsigned int *ibs, int ibs_code) | ||
263 | { | ||
264 | struct task_struct *task; | ||
265 | |||
266 | cpu_buf->sample_received++; | ||
267 | |||
268 | if (nr_available_slots(cpu_buf) < MAX_IBS_SAMPLE_SIZE) { | ||
269 | cpu_buf->sample_lost_overflow++; | ||
270 | return 0; | ||
271 | } | ||
272 | |||
273 | is_kernel = !!is_kernel; | ||
274 | |||
275 | /* notice a switch from user->kernel or vice versa */ | ||
276 | if (cpu_buf->last_is_kernel != is_kernel) { | ||
277 | cpu_buf->last_is_kernel = is_kernel; | ||
278 | add_code(cpu_buf, is_kernel); | ||
279 | } | ||
280 | |||
281 | /* notice a task switch */ | ||
282 | if (!is_kernel) { | ||
283 | task = current; | ||
284 | |||
285 | if (cpu_buf->last_task != task) { | ||
286 | cpu_buf->last_task = task; | ||
287 | add_code(cpu_buf, (unsigned long)task); | ||
288 | } | ||
289 | } | ||
290 | |||
291 | add_code(cpu_buf, ibs_code); | ||
292 | add_sample(cpu_buf, ibs[0], ibs[1]); | ||
293 | add_sample(cpu_buf, ibs[2], ibs[3]); | ||
294 | add_sample(cpu_buf, ibs[4], ibs[5]); | ||
295 | |||
296 | if (ibs_code == IBS_OP_BEGIN) { | ||
297 | add_sample(cpu_buf, ibs[6], ibs[7]); | ||
298 | add_sample(cpu_buf, ibs[8], ibs[9]); | ||
299 | add_sample(cpu_buf, ibs[10], ibs[11]); | ||
300 | } | ||
301 | |||
302 | return 1; | ||
303 | } | ||
304 | |||
305 | void oprofile_add_ibs_sample(struct pt_regs *const regs, | ||
306 | unsigned int * const ibs_sample, u8 code) | ||
307 | { | ||
308 | int is_kernel = !user_mode(regs); | ||
309 | unsigned long pc = profile_pc(regs); | ||
310 | |||
311 | struct oprofile_cpu_buffer *cpu_buf = | ||
312 | &per_cpu(cpu_buffer, smp_processor_id()); | ||
313 | |||
314 | if (!backtrace_depth) { | ||
315 | log_ibs_sample(cpu_buf, pc, is_kernel, ibs_sample, code); | ||
316 | return; | ||
317 | } | ||
318 | |||
319 | /* if log_sample() fails we can't backtrace since we lost the source | ||
320 | * of this event */ | ||
321 | if (log_ibs_sample(cpu_buf, pc, is_kernel, ibs_sample, code)) | ||
322 | oprofile_ops.backtrace(regs, backtrace_depth); | ||
323 | } | ||
324 | |||
325 | #endif | ||
326 | |||
257 | void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) | 327 | void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) |
258 | { | 328 | { |
259 | struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); | 329 | struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); |
@@ -296,7 +366,7 @@ static void wq_sync_buffer(struct work_struct *work) | |||
296 | struct oprofile_cpu_buffer * b = | 366 | struct oprofile_cpu_buffer * b = |
297 | container_of(work, struct oprofile_cpu_buffer, work.work); | 367 | container_of(work, struct oprofile_cpu_buffer, work.work); |
298 | if (b->cpu != smp_processor_id()) { | 368 | if (b->cpu != smp_processor_id()) { |
299 | printk("WQ on CPU%d, prefer CPU%d\n", | 369 | printk(KERN_DEBUG "WQ on CPU%d, prefer CPU%d\n", |
300 | smp_processor_id(), b->cpu); | 370 | smp_processor_id(), b->cpu); |
301 | } | 371 | } |
302 | sync_buffer(b->cpu); | 372 | sync_buffer(b->cpu); |
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index c3e366b52261..9c44d004da69 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h | |||
@@ -55,5 +55,7 @@ void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf); | |||
55 | /* transient events for the CPU buffer -> event buffer */ | 55 | /* transient events for the CPU buffer -> event buffer */ |
56 | #define CPU_IS_KERNEL 1 | 56 | #define CPU_IS_KERNEL 1 |
57 | #define CPU_TRACE_BEGIN 2 | 57 | #define CPU_TRACE_BEGIN 2 |
58 | #define IBS_FETCH_BEGIN 3 | ||
59 | #define IBS_OP_BEGIN 4 | ||
58 | 60 | ||
59 | #endif /* OPROFILE_CPU_BUFFER_H */ | 61 | #endif /* OPROFILE_CPU_BUFFER_H */ |