aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCarl Love <cel@us.ibm.com>2008-10-14 19:37:01 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2008-10-21 00:17:48 -0400
commita5598ca0d49821912a5053c05f07fd650671eb6d (patch)
treefcfa402eeb45f76fbb03886708e5042fe5f1babb
parentbb5e6491cae4c5d6ddfa3e173e22efb35f595949 (diff)
powerpc/oprofile: Fix mutex locking for cell spu-oprofile
The issue is the SPU code is not holding the kernel mutex lock while adding samples to the kernel buffer. This patch creates per SPU buffers to hold the data. Data is added to the buffers from in interrupt context. The data is periodically pushed to the kernel buffer via a new Oprofile function oprofile_put_buff(). The oprofile_put_buff() function is called via a work queue enabling the funtion to acquire the mutex lock. The existing user controls for adjusting the per CPU buffer size is used to control the size of the per SPU buffers. Similarly, overflows of the SPU buffers are reported by incrementing the per CPU buffer stats. This eliminates the need to have architecture specific controls for the per SPU buffers which is not acceptable to the OProfile user tool maintainer. The export of the oprofile add_event_entry() is removed as it is no longer needed given this patch. Note, this patch has not addressed the issue of indexing arrays by the spu number. This still needs to be fixed as the spu numbering is not guarenteed to be 0 to max_num_spus-1. Signed-off-by: Carl Love <carll@us.ibm.com> Signed-off-by: Maynard Johnson <maynardj@us.ibm.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Acked-by: Robert Richter <robert.richter@amd.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/oprofile/cell/pr_util.h13
-rw-r--r--arch/powerpc/oprofile/cell/spu_profiler.c4
-rw-r--r--arch/powerpc/oprofile/cell/spu_task_sync.c236
-rw-r--r--drivers/oprofile/buffer_sync.c24
-rw-r--r--drivers/oprofile/cpu_buffer.c15
-rw-r--r--drivers/oprofile/event_buffer.h7
-rw-r--r--include/linux/oprofile.h16
7 files changed, 279 insertions, 36 deletions
diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h
index 22e4e8d4eb2..628009c0195 100644
--- a/arch/powerpc/oprofile/cell/pr_util.h
+++ b/arch/powerpc/oprofile/cell/pr_util.h
@@ -24,6 +24,11 @@
24#define SKIP_GENERIC_SYNC 0 24#define SKIP_GENERIC_SYNC 0
25#define SYNC_START_ERROR -1 25#define SYNC_START_ERROR -1
26#define DO_GENERIC_SYNC 1 26#define DO_GENERIC_SYNC 1
27#define SPUS_PER_NODE 8
28#define DEFAULT_TIMER_EXPIRE (HZ / 10)
29
30extern struct delayed_work spu_work;
31extern int spu_prof_running;
27 32
28struct spu_overlay_info { /* map of sections within an SPU overlay */ 33struct spu_overlay_info { /* map of sections within an SPU overlay */
29 unsigned int vma; /* SPU virtual memory address from elf */ 34 unsigned int vma; /* SPU virtual memory address from elf */
@@ -62,6 +67,14 @@ struct vma_to_fileoffset_map { /* map of sections within an SPU program */
62 67
63}; 68};
64 69
70struct spu_buffer {
71 int last_guard_val;
72 int ctx_sw_seen;
73 unsigned long *buff;
74 unsigned int head, tail;
75};
76
77
65/* The three functions below are for maintaining and accessing 78/* The three functions below are for maintaining and accessing
66 * the vma-to-fileoffset map. 79 * the vma-to-fileoffset map.
67 */ 80 */
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c
index 380d7e21753..6edaebd5099 100644
--- a/arch/powerpc/oprofile/cell/spu_profiler.c
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -23,12 +23,11 @@
23 23
24static u32 *samples; 24static u32 *samples;
25 25
26static int spu_prof_running; 26int spu_prof_running;
27static unsigned int profiling_interval; 27static unsigned int profiling_interval;
28 28
29#define NUM_SPU_BITS_TRBUF 16 29#define NUM_SPU_BITS_TRBUF 16
30#define SPUS_PER_TB_ENTRY 4 30#define SPUS_PER_TB_ENTRY 4
31#define SPUS_PER_NODE 8
32 31
33#define SPU_PC_MASK 0xFFFF 32#define SPU_PC_MASK 0xFFFF
34 33
@@ -208,6 +207,7 @@ int start_spu_profiling(unsigned int cycles_reset)
208 207
209 spu_prof_running = 1; 208 spu_prof_running = 1;
210 hrtimer_start(&timer, kt, HRTIMER_MODE_REL); 209 hrtimer_start(&timer, kt, HRTIMER_MODE_REL);
210 schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
211 211
212 return 0; 212 return 0;
213} 213}
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 2a9b4a04932..2949126d28d 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -35,7 +35,102 @@ static DEFINE_SPINLOCK(buffer_lock);
35static DEFINE_SPINLOCK(cache_lock); 35static DEFINE_SPINLOCK(cache_lock);
36static int num_spu_nodes; 36static int num_spu_nodes;
37int spu_prof_num_nodes; 37int spu_prof_num_nodes;
38int last_guard_val[MAX_NUMNODES * 8]; 38
39struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE];
40struct delayed_work spu_work;
41static unsigned max_spu_buff;
42
43static void spu_buff_add(unsigned long int value, int spu)
44{
45 /* spu buff is a circular buffer. Add entries to the
46 * head. Head is the index to store the next value.
47 * The buffer is full when there is one available entry
48 * in the queue, i.e. head and tail can't be equal.
49 * That way we can tell the difference between the
50 * buffer being full versus empty.
51 *
52 * ASSUPTION: the buffer_lock is held when this function
53 * is called to lock the buffer, head and tail.
54 */
55 int full = 1;
56
57 if (spu_buff[spu].head >= spu_buff[spu].tail) {
58 if ((spu_buff[spu].head - spu_buff[spu].tail)
59 < (max_spu_buff - 1))
60 full = 0;
61
62 } else if (spu_buff[spu].tail > spu_buff[spu].head) {
63 if ((spu_buff[spu].tail - spu_buff[spu].head)
64 > 1)
65 full = 0;
66 }
67
68 if (!full) {
69 spu_buff[spu].buff[spu_buff[spu].head] = value;
70 spu_buff[spu].head++;
71
72 if (spu_buff[spu].head >= max_spu_buff)
73 spu_buff[spu].head = 0;
74 } else {
75 /* From the user's perspective make the SPU buffer
76 * size management/overflow look like we are using
77 * per cpu buffers. The user uses the same
78 * per cpu parameter to adjust the SPU buffer size.
79 * Increment the sample_lost_overflow to inform
80 * the user the buffer size needs to be increased.
81 */
82 oprofile_cpu_buffer_inc_smpl_lost();
83 }
84}
85
86/* This function copies the per SPU buffers to the
87 * OProfile kernel buffer.
88 */
89void sync_spu_buff(void)
90{
91 int spu;
92 unsigned long flags;
93 int curr_head;
94
95 for (spu = 0; spu < num_spu_nodes; spu++) {
96 /* In case there was an issue and the buffer didn't
97 * get created skip it.
98 */
99 if (spu_buff[spu].buff == NULL)
100 continue;
101
102 /* Hold the lock to make sure the head/tail
103 * doesn't change while spu_buff_add() is
104 * deciding if the buffer is full or not.
105 * Being a little paranoid.
106 */
107 spin_lock_irqsave(&buffer_lock, flags);
108 curr_head = spu_buff[spu].head;
109 spin_unlock_irqrestore(&buffer_lock, flags);
110
111 /* Transfer the current contents to the kernel buffer.
112 * data can still be added to the head of the buffer.
113 */
114 oprofile_put_buff(spu_buff[spu].buff,
115 spu_buff[spu].tail,
116 curr_head, max_spu_buff);
117
118 spin_lock_irqsave(&buffer_lock, flags);
119 spu_buff[spu].tail = curr_head;
120 spin_unlock_irqrestore(&buffer_lock, flags);
121 }
122
123}
124
125static void wq_sync_spu_buff(struct work_struct *work)
126{
127 /* move data from spu buffers to kernel buffer */
128 sync_spu_buff();
129
130 /* only reschedule if profiling is not done */
131 if (spu_prof_running)
132 schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
133}
39 134
40/* Container for caching information about an active SPU task. */ 135/* Container for caching information about an active SPU task. */
41struct cached_info { 136struct cached_info {
@@ -305,14 +400,21 @@ static int process_context_switch(struct spu *spu, unsigned long objectId)
305 400
306 /* Record context info in event buffer */ 401 /* Record context info in event buffer */
307 spin_lock_irqsave(&buffer_lock, flags); 402 spin_lock_irqsave(&buffer_lock, flags);
308 add_event_entry(ESCAPE_CODE); 403 spu_buff_add(ESCAPE_CODE, spu->number);
309 add_event_entry(SPU_CTX_SWITCH_CODE); 404 spu_buff_add(SPU_CTX_SWITCH_CODE, spu->number);
310 add_event_entry(spu->number); 405 spu_buff_add(spu->number, spu->number);
311 add_event_entry(spu->pid); 406 spu_buff_add(spu->pid, spu->number);
312 add_event_entry(spu->tgid); 407 spu_buff_add(spu->tgid, spu->number);
313 add_event_entry(app_dcookie); 408 spu_buff_add(app_dcookie, spu->number);
314 add_event_entry(spu_cookie); 409 spu_buff_add(spu_cookie, spu->number);
315 add_event_entry(offset); 410 spu_buff_add(offset, spu->number);
411
412 /* Set flag to indicate SPU PC data can now be written out. If
413 * the SPU program counter data is seen before an SPU context
414 * record is seen, the postprocessing will fail.
415 */
416 spu_buff[spu->number].ctx_sw_seen = 1;
417
316 spin_unlock_irqrestore(&buffer_lock, flags); 418 spin_unlock_irqrestore(&buffer_lock, flags);
317 smp_wmb(); /* insure spu event buffer updates are written */ 419 smp_wmb(); /* insure spu event buffer updates are written */
318 /* don't want entries intermingled... */ 420 /* don't want entries intermingled... */
@@ -360,6 +462,47 @@ static int number_of_online_nodes(void)
360 return nodes; 462 return nodes;
361} 463}
362 464
465static int oprofile_spu_buff_create(void)
466{
467 int spu;
468
469 max_spu_buff = oprofile_get_cpu_buffer_size();
470
471 for (spu = 0; spu < num_spu_nodes; spu++) {
472 /* create circular buffers to store the data in.
473 * use locks to manage accessing the buffers
474 */
475 spu_buff[spu].head = 0;
476 spu_buff[spu].tail = 0;
477
478 /*
479 * Create a buffer for each SPU. Can't reliably
480 * create a single buffer for all spus due to not
481 * enough contiguous kernel memory.
482 */
483
484 spu_buff[spu].buff = kzalloc((max_spu_buff
485 * sizeof(unsigned long)),
486 GFP_KERNEL);
487
488 if (!spu_buff[spu].buff) {
489 printk(KERN_ERR "SPU_PROF: "
490 "%s, line %d: oprofile_spu_buff_create "
491 "failed to allocate spu buffer %d.\n",
492 __func__, __LINE__, spu);
493
494 /* release the spu buffers that have been allocated */
495 while (spu >= 0) {
496 kfree(spu_buff[spu].buff);
497 spu_buff[spu].buff = 0;
498 spu--;
499 }
500 return -ENOMEM;
501 }
502 }
503 return 0;
504}
505
363/* The main purpose of this function is to synchronize 506/* The main purpose of this function is to synchronize
364 * OProfile with SPUFS by registering to be notified of 507 * OProfile with SPUFS by registering to be notified of
365 * SPU task switches. 508 * SPU task switches.
@@ -372,20 +515,35 @@ static int number_of_online_nodes(void)
372 */ 515 */
373int spu_sync_start(void) 516int spu_sync_start(void)
374{ 517{
375 int k; 518 int spu;
376 int ret = SKIP_GENERIC_SYNC; 519 int ret = SKIP_GENERIC_SYNC;
377 int register_ret; 520 int register_ret;
378 unsigned long flags = 0; 521 unsigned long flags = 0;
379 522
380 spu_prof_num_nodes = number_of_online_nodes(); 523 spu_prof_num_nodes = number_of_online_nodes();
381 num_spu_nodes = spu_prof_num_nodes * 8; 524 num_spu_nodes = spu_prof_num_nodes * 8;
525 INIT_DELAYED_WORK(&spu_work, wq_sync_spu_buff);
526
527 /* create buffer for storing the SPU data to put in
528 * the kernel buffer.
529 */
530 ret = oprofile_spu_buff_create();
531 if (ret)
532 goto out;
382 533
383 spin_lock_irqsave(&buffer_lock, flags); 534 spin_lock_irqsave(&buffer_lock, flags);
384 add_event_entry(ESCAPE_CODE); 535 for (spu = 0; spu < num_spu_nodes; spu++) {
385 add_event_entry(SPU_PROFILING_CODE); 536 spu_buff_add(ESCAPE_CODE, spu);
386 add_event_entry(num_spu_nodes); 537 spu_buff_add(SPU_PROFILING_CODE, spu);
538 spu_buff_add(num_spu_nodes, spu);
539 }
387 spin_unlock_irqrestore(&buffer_lock, flags); 540 spin_unlock_irqrestore(&buffer_lock, flags);
388 541
542 for (spu = 0; spu < num_spu_nodes; spu++) {
543 spu_buff[spu].ctx_sw_seen = 0;
544 spu_buff[spu].last_guard_val = 0;
545 }
546
389 /* Register for SPU events */ 547 /* Register for SPU events */
390 register_ret = spu_switch_event_register(&spu_active); 548 register_ret = spu_switch_event_register(&spu_active);
391 if (register_ret) { 549 if (register_ret) {
@@ -393,8 +551,6 @@ int spu_sync_start(void)
393 goto out; 551 goto out;
394 } 552 }
395 553
396 for (k = 0; k < (MAX_NUMNODES * 8); k++)
397 last_guard_val[k] = 0;
398 pr_debug("spu_sync_start -- running.\n"); 554 pr_debug("spu_sync_start -- running.\n");
399out: 555out:
400 return ret; 556 return ret;
@@ -446,13 +602,20 @@ void spu_sync_buffer(int spu_num, unsigned int *samples,
446 * use. We need to discard samples taken during the time 602 * use. We need to discard samples taken during the time
447 * period which an overlay occurs (i.e., guard value changes). 603 * period which an overlay occurs (i.e., guard value changes).
448 */ 604 */
449 if (grd_val && grd_val != last_guard_val[spu_num]) { 605 if (grd_val && grd_val != spu_buff[spu_num].last_guard_val) {
450 last_guard_val[spu_num] = grd_val; 606 spu_buff[spu_num].last_guard_val = grd_val;
451 /* Drop the rest of the samples. */ 607 /* Drop the rest of the samples. */
452 break; 608 break;
453 } 609 }
454 610
455 add_event_entry(file_offset | spu_num_shifted); 611 /* We must ensure that the SPU context switch has been written
612 * out before samples for the SPU. Otherwise, the SPU context
613 * information is not available and the postprocessing of the
614 * SPU PC will fail with no available anonymous map information.
615 */
616 if (spu_buff[spu_num].ctx_sw_seen)
617 spu_buff_add((file_offset | spu_num_shifted),
618 spu_num);
456 } 619 }
457 spin_unlock(&buffer_lock); 620 spin_unlock(&buffer_lock);
458out: 621out:
@@ -463,20 +626,41 @@ out:
463int spu_sync_stop(void) 626int spu_sync_stop(void)
464{ 627{
465 unsigned long flags = 0; 628 unsigned long flags = 0;
466 int ret = spu_switch_event_unregister(&spu_active); 629 int ret;
467 if (ret) { 630 int k;
631
632 ret = spu_switch_event_unregister(&spu_active);
633
634 if (ret)
468 printk(KERN_ERR "SPU_PROF: " 635 printk(KERN_ERR "SPU_PROF: "
469 "%s, line %d: spu_switch_event_unregister returned %d\n", 636 "%s, line %d: spu_switch_event_unregister " \
470 __func__, __LINE__, ret); 637 "returned %d\n",
471 goto out; 638 __func__, __LINE__, ret);
472 } 639
640 /* flush any remaining data in the per SPU buffers */
641 sync_spu_buff();
473 642
474 spin_lock_irqsave(&cache_lock, flags); 643 spin_lock_irqsave(&cache_lock, flags);
475 ret = release_cached_info(RELEASE_ALL); 644 ret = release_cached_info(RELEASE_ALL);
476 spin_unlock_irqrestore(&cache_lock, flags); 645 spin_unlock_irqrestore(&cache_lock, flags);
477out: 646
647 /* remove scheduled work queue item rather then waiting
648 * for every queued entry to execute. Then flush pending
649 * system wide buffer to event buffer.
650 */
651 cancel_delayed_work(&spu_work);
652
653 for (k = 0; k < num_spu_nodes; k++) {
654 spu_buff[k].ctx_sw_seen = 0;
655
656 /*
657 * spu_sys_buff will be null if there was a problem
658 * allocating the buffer. Only delete if it exists.
659 */
660 kfree(spu_buff[k].buff);
661 spu_buff[k].buff = 0;
662 }
478 pr_debug("spu_sync_stop -- done.\n"); 663 pr_debug("spu_sync_stop -- done.\n");
479 return ret; 664 return ret;
480} 665}
481 666
482
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index ed982273fb8..37681700b61 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -628,3 +628,27 @@ void sync_buffer(int cpu)
628 628
629 mutex_unlock(&buffer_mutex); 629 mutex_unlock(&buffer_mutex);
630} 630}
631
632/* The function can be used to add a buffer worth of data directly to
633 * the kernel buffer. The buffer is assumed to be a circular buffer.
634 * Take the entries from index start and end at index end, wrapping
635 * at max_entries.
636 */
637void oprofile_put_buff(unsigned long *buf, unsigned int start,
638 unsigned int stop, unsigned int max)
639{
640 int i;
641
642 i = start;
643
644 mutex_lock(&buffer_mutex);
645 while (i != stop) {
646 add_event_entry(buf[i++]);
647
648 if (i >= max)
649 i = 0;
650 }
651
652 mutex_unlock(&buffer_mutex);
653}
654
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index e1bd5a937f6..7ba39fe20a8 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -38,13 +38,26 @@ static int work_enabled;
38void free_cpu_buffers(void) 38void free_cpu_buffers(void)
39{ 39{
40 int i; 40 int i;
41 41
42 for_each_online_cpu(i) { 42 for_each_online_cpu(i) {
43 vfree(per_cpu(cpu_buffer, i).buffer); 43 vfree(per_cpu(cpu_buffer, i).buffer);
44 per_cpu(cpu_buffer, i).buffer = NULL; 44 per_cpu(cpu_buffer, i).buffer = NULL;
45 } 45 }
46} 46}
47 47
48unsigned long oprofile_get_cpu_buffer_size(void)
49{
50 return fs_cpu_buffer_size;
51}
52
53void oprofile_cpu_buffer_inc_smpl_lost(void)
54{
55 struct oprofile_cpu_buffer *cpu_buf
56 = &__get_cpu_var(cpu_buffer);
57
58 cpu_buf->sample_lost_overflow++;
59}
60
48int alloc_cpu_buffers(void) 61int alloc_cpu_buffers(void)
49{ 62{
50 int i; 63 int i;
diff --git a/drivers/oprofile/event_buffer.h b/drivers/oprofile/event_buffer.h
index 5076ed1ebd8..84bf324c577 100644
--- a/drivers/oprofile/event_buffer.h
+++ b/drivers/oprofile/event_buffer.h
@@ -17,6 +17,13 @@ int alloc_event_buffer(void);
17 17
18void free_event_buffer(void); 18void free_event_buffer(void);
19 19
20/**
21 * Add data to the event buffer.
22 * The data passed is free-form, but typically consists of
23 * file offsets, dcookies, context information, and ESCAPE codes.
24 */
25void add_event_entry(unsigned long data);
26
20/* wake up the process sleeping on the event file */ 27/* wake up the process sleeping on the event file */
21void wake_up_buffer_waiter(void); 28void wake_up_buffer_waiter(void);
22 29
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index bcb8f725427..5231861f357 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -86,13 +86,6 @@ int oprofile_arch_init(struct oprofile_operations * ops);
86void oprofile_arch_exit(void); 86void oprofile_arch_exit(void);
87 87
88/** 88/**
89 * Add data to the event buffer.
90 * The data passed is free-form, but typically consists of
91 * file offsets, dcookies, context information, and ESCAPE codes.
92 */
93void add_event_entry(unsigned long data);
94
95/**
96 * Add a sample. This may be called from any context. Pass 89 * Add a sample. This may be called from any context. Pass
97 * smp_processor_id() as cpu. 90 * smp_processor_id() as cpu.
98 */ 91 */
@@ -162,5 +155,14 @@ int oprofilefs_ulong_from_user(unsigned long * val, char const __user * buf, siz
162 155
163/** lock for read/write safety */ 156/** lock for read/write safety */
164extern spinlock_t oprofilefs_lock; 157extern spinlock_t oprofilefs_lock;
158
159/**
160 * Add the contents of a circular buffer to the event buffer.
161 */
162void oprofile_put_buff(unsigned long *buf, unsigned int start,
163 unsigned int stop, unsigned int max);
164
165unsigned long oprofile_get_cpu_buffer_size(void);
166void oprofile_cpu_buffer_inc_smpl_lost(void);
165 167
166#endif /* OPROFILE_H */ 168#endif /* OPROFILE_H */