aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorCarl Love <cel@us.ibm.com>2008-12-01 19:18:36 -0500
committerRobert Richter <robert.richter@amd.com>2009-01-08 09:51:55 -0500
commit883823291d22e06736f1056da6d8303291d6bbf9 (patch)
treedfa8a4bba8599b8887b66048532e6360bfc6e870 /arch/powerpc
parent014cef91ecef9d5e85f9c98a2efbf8a8c4710510 (diff)
powerpc/oprofile: IBM CELL: add SPU event profiling support
This patch adds the SPU event based profiling funcitonality for the IBM Cell processor. Previously, the CELL OProfile kernel code supported PPU event, PPU cycle profiling and SPU cycle profiling. The addition of SPU event profiling allows the users to identify where in their SPU code various SPU evnets are occuring. This should help users further identify issues with their code. Note, SPU profiling has some limitations due to HW constraints. Only one event at a time can be used for profiling and SPU event profiling must be time sliced across all of the SPUs in a node. The patch adds a new arch specific file to the OProfile file system. The file has bit 0 set to indicate that the kernel supports SPU event profiling. The user tool must check this file/bit to make sure the kernel supports SPU event profiling before trying to do SPU event profiling. The user tool check is part of the user tool patch for SPU event profiling. Signed-off-by: Carl Love <carll@us.ibm.com> Signed-off-by: Robert Richter <robert.richter@amd.com>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/include/asm/cell-pmu.h2
-rw-r--r--arch/powerpc/include/asm/oprofile_impl.h6
-rw-r--r--arch/powerpc/oprofile/cell/pr_util.h7
-rw-r--r--arch/powerpc/oprofile/cell/spu_profiler.c34
-rw-r--r--arch/powerpc/oprofile/common.c22
-rw-r--r--arch/powerpc/oprofile/op_model_cell.c490
6 files changed, 545 insertions, 16 deletions
diff --git a/arch/powerpc/include/asm/cell-pmu.h b/arch/powerpc/include/asm/cell-pmu.h
index 8066eede3a0c..b4b7338ad79e 100644
--- a/arch/powerpc/include/asm/cell-pmu.h
+++ b/arch/powerpc/include/asm/cell-pmu.h
@@ -37,9 +37,11 @@
37#define CBE_PM_STOP_AT_MAX 0x40000000 37#define CBE_PM_STOP_AT_MAX 0x40000000
38#define CBE_PM_TRACE_MODE_GET(pm_control) (((pm_control) >> 28) & 0x3) 38#define CBE_PM_TRACE_MODE_GET(pm_control) (((pm_control) >> 28) & 0x3)
39#define CBE_PM_TRACE_MODE_SET(mode) (((mode) & 0x3) << 28) 39#define CBE_PM_TRACE_MODE_SET(mode) (((mode) & 0x3) << 28)
40#define CBE_PM_TRACE_BUF_OVFLW(bit) (((bit) & 0x1) << 17)
40#define CBE_PM_COUNT_MODE_SET(count) (((count) & 0x3) << 18) 41#define CBE_PM_COUNT_MODE_SET(count) (((count) & 0x3) << 18)
41#define CBE_PM_FREEZE_ALL_CTRS 0x00100000 42#define CBE_PM_FREEZE_ALL_CTRS 0x00100000
42#define CBE_PM_ENABLE_EXT_TRACE 0x00008000 43#define CBE_PM_ENABLE_EXT_TRACE 0x00008000
44#define CBE_PM_SPU_ADDR_TRACE_SET(msk) (((msk) & 0x3) << 9)
43 45
44/* Macros for the trace_address register. */ 46/* Macros for the trace_address register. */
45#define CBE_PM_TRACE_BUF_FULL 0x00000800 47#define CBE_PM_TRACE_BUF_FULL 0x00000800
diff --git a/arch/powerpc/include/asm/oprofile_impl.h b/arch/powerpc/include/asm/oprofile_impl.h
index 95035c602ba6..639dc96077ab 100644
--- a/arch/powerpc/include/asm/oprofile_impl.h
+++ b/arch/powerpc/include/asm/oprofile_impl.h
@@ -32,6 +32,12 @@ struct op_system_config {
32 unsigned long mmcr0; 32 unsigned long mmcr0;
33 unsigned long mmcr1; 33 unsigned long mmcr1;
34 unsigned long mmcra; 34 unsigned long mmcra;
35#ifdef CONFIG_OPROFILE_CELL
36 /* Register for oprofile user tool to check cell kernel profiling
37 * suport.
38 */
39 unsigned long cell_support;
40#endif
35#endif 41#endif
36 unsigned long enable_kernel; 42 unsigned long enable_kernel;
37 unsigned long enable_user; 43 unsigned long enable_user;
diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h
index bca7207bd92a..a048b0b72be3 100644
--- a/arch/powerpc/oprofile/cell/pr_util.h
+++ b/arch/powerpc/oprofile/cell/pr_util.h
@@ -30,6 +30,10 @@
30extern struct delayed_work spu_work; 30extern struct delayed_work spu_work;
31extern int spu_prof_running; 31extern int spu_prof_running;
32 32
33#define TRACE_ARRAY_SIZE 1024
34
35extern spinlock_t oprof_spu_smpl_arry_lck;
36
33struct spu_overlay_info { /* map of sections within an SPU overlay */ 37struct spu_overlay_info { /* map of sections within an SPU overlay */
34 unsigned int vma; /* SPU virtual memory address from elf */ 38 unsigned int vma; /* SPU virtual memory address from elf */
35 unsigned int size; /* size of section from elf */ 39 unsigned int size; /* size of section from elf */
@@ -90,9 +94,10 @@ void vma_map_free(struct vma_to_fileoffset_map *map);
90 * cycles_reset is the SPU_CYCLES count value specified by the user. 94 * cycles_reset is the SPU_CYCLES count value specified by the user.
91 */ 95 */
92int start_spu_profiling_cycles(unsigned int cycles_reset); 96int start_spu_profiling_cycles(unsigned int cycles_reset);
97void start_spu_profiling_events(void);
93 98
94void stop_spu_profiling_cycles(void); 99void stop_spu_profiling_cycles(void);
95 100void stop_spu_profiling_events(void);
96 101
97/* add the necessary profiling hooks */ 102/* add the necessary profiling hooks */
98int spu_sync_start(void); 103int spu_sync_start(void);
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c
index 8b1b9ccaff9f..de170b7ae71b 100644
--- a/arch/powerpc/oprofile/cell/spu_profiler.c
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -18,11 +18,21 @@
18#include <asm/cell-pmu.h> 18#include <asm/cell-pmu.h>
19#include "pr_util.h" 19#include "pr_util.h"
20 20
21#define TRACE_ARRAY_SIZE 1024
22#define SCALE_SHIFT 14 21#define SCALE_SHIFT 14
23 22
24static u32 *samples; 23static u32 *samples;
25 24
25/* spu_prof_running is a flag used to indicate if spu profiling is enabled
26 * or not. It is set by the routines start_spu_profiling_cycles() and
27 * start_spu_profiling_events(). The flag is cleared by the routines
28 * stop_spu_profiling_cycles() and stop_spu_profiling_events(). These
29 * routines are called via global_start() and global_stop() which are called in
30 * op_powerpc_start() and op_powerpc_stop(). These routines are called once
31 * per system as a result of the user starting/stopping oprofile. Hence, only
32 * one CPU per user at a time will be changing the value of spu_prof_running.
33 * In general, OProfile does not protect against multiple users trying to run
34 * OProfile at a time.
35 */
26int spu_prof_running; 36int spu_prof_running;
27static unsigned int profiling_interval; 37static unsigned int profiling_interval;
28 38
@@ -31,7 +41,7 @@ static unsigned int profiling_interval;
31 41
32#define SPU_PC_MASK 0xFFFF 42#define SPU_PC_MASK 0xFFFF
33 43
34static DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck); 44DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck);
35unsigned long oprof_spu_smpl_arry_lck_flags; 45unsigned long oprof_spu_smpl_arry_lck_flags;
36 46
37void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) 47void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
@@ -212,6 +222,21 @@ int start_spu_profiling_cycles(unsigned int cycles_reset)
212 return 0; 222 return 0;
213} 223}
214 224
225/*
226 * Entry point for SPU event profiling.
227 * NOTE: SPU profiling is done system-wide, not per-CPU.
228 *
229 * cycles_reset is the count value specified by the user when
230 * setting up OProfile to count SPU_CYCLES.
231 */
232void start_spu_profiling_events(void)
233{
234 spu_prof_running = 1;
235 schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
236
237 return;
238}
239
215void stop_spu_profiling_cycles(void) 240void stop_spu_profiling_cycles(void)
216{ 241{
217 spu_prof_running = 0; 242 spu_prof_running = 0;
@@ -219,3 +244,8 @@ void stop_spu_profiling_cycles(void)
219 kfree(samples); 244 kfree(samples);
220 pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n"); 245 pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n");
221} 246}
247
248void stop_spu_profiling_events(void)
249{
250 spu_prof_running = 0;
251}
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index 17807acb05d9..21f16edf6c8d 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -132,6 +132,28 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root)
132 oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0); 132 oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0);
133 oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1); 133 oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1);
134 oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra); 134 oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra);
135#ifdef CONFIG_OPROFILE_CELL
136 /* create a file the user tool can check to see what level of profiling
137 * support exits with this kernel. Initialize bit mask to indicate
138 * what support the kernel has:
139 * bit 0 - Supports SPU event profiling in addition to PPU
140 * event and cycles; and SPU cycle profiling
141 * bits 1-31 - Currently unused.
142 *
143 * If the file does not exist, then the kernel only supports SPU
144 * cycle profiling, PPU event and cycle profiling.
145 */
146 oprofilefs_create_ulong(sb, root, "cell_support", &sys.cell_support);
147 sys.cell_support = 0x1; /* Note, the user OProfile tool must check
148 * that this bit is set before attempting to
149 * user SPU event profiling. Older kernels
150 * will not have this file, hence the user
151 * tool is not allowed to do SPU event
152 * profiling on older kernels. Older kernels
153 * will accept SPU events but collected data
154 * is garbage.
155 */
156#endif
135#endif 157#endif
136 158
137 for (i = 0; i < model->num_counters; ++i) { 159 for (i = 0; i < model->num_counters; ++i) {
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index ad7f32c848f8..ff96cbfb89bb 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -44,6 +44,12 @@
44#define SPU_PROFILING_CYCLES 1 44#define SPU_PROFILING_CYCLES 1
45#define SPU_PROFILING_EVENTS 2 45#define SPU_PROFILING_EVENTS 2
46 46
47#define SPU_EVENT_NUM_START 4100
48#define SPU_EVENT_NUM_STOP 4399
49#define SPU_PROFILE_EVENT_ADDR 4363 /* spu, address trace, decimal */
50#define SPU_PROFILE_EVENT_ADDR_MASK_A 0x146 /* sub unit set to zero */
51#define SPU_PROFILE_EVENT_ADDR_MASK_B 0x186 /* sub unit set to zero */
52
47#define NUM_SPUS_PER_NODE 8 53#define NUM_SPUS_PER_NODE 8
48#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ 54#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */
49 55
@@ -61,6 +67,12 @@
61 67
62#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ 68#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */
63 69
70/* Minumum HW interval timer setting to send value to trace buffer is 10 cycle.
71 * To configure counter to send value every N cycles set counter to
72 * 2^32 - 1 - N.
73 */
74#define NUM_INTERVAL_CYC 0xFFFFFFFF - 10
75
64/* 76/*
65 * spu_cycle_reset is the number of cycles between samples. 77 * spu_cycle_reset is the number of cycles between samples.
66 * This variable is used for SPU profiling and should ONLY be set 78 * This variable is used for SPU profiling and should ONLY be set
@@ -68,6 +80,7 @@
68 */ 80 */
69static unsigned int spu_cycle_reset; 81static unsigned int spu_cycle_reset;
70static unsigned int profiling_mode; 82static unsigned int profiling_mode;
83static int spu_evnt_phys_spu_indx;
71 84
72struct pmc_cntrl_data { 85struct pmc_cntrl_data {
73 unsigned long vcntr; 86 unsigned long vcntr;
@@ -108,6 +121,8 @@ struct pm_cntrl {
108 u16 trace_mode; 121 u16 trace_mode;
109 u16 freeze; 122 u16 freeze;
110 u16 count_mode; 123 u16 count_mode;
124 u16 spu_addr_trace;
125 u8 trace_buf_ovflw;
111}; 126};
112 127
113static struct { 128static struct {
@@ -125,6 +140,7 @@ static struct {
125#define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) 140#define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2)
126 141
127static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); 142static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
143static unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE];
128static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; 144static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
129 145
130/* 146/*
@@ -154,6 +170,7 @@ static u32 hdw_thread;
154 170
155static u32 virt_cntr_inter_mask; 171static u32 virt_cntr_inter_mask;
156static struct timer_list timer_virt_cntr; 172static struct timer_list timer_virt_cntr;
173static struct timer_list timer_spu_event_swap;
157 174
158/* 175/*
159 * pm_signal needs to be global since it is initialized in 176 * pm_signal needs to be global since it is initialized in
@@ -372,9 +389,13 @@ static void write_pm_cntrl(int cpu)
372 if (pm_regs.pm_cntrl.trace_mode != 0) 389 if (pm_regs.pm_cntrl.trace_mode != 0)
373 val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode); 390 val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode);
374 391
392 if (pm_regs.pm_cntrl.trace_buf_ovflw == 1)
393 val |= CBE_PM_TRACE_BUF_OVFLW(pm_regs.pm_cntrl.trace_buf_ovflw);
375 if (pm_regs.pm_cntrl.freeze == 1) 394 if (pm_regs.pm_cntrl.freeze == 1)
376 val |= CBE_PM_FREEZE_ALL_CTRS; 395 val |= CBE_PM_FREEZE_ALL_CTRS;
377 396
397 val |= CBE_PM_SPU_ADDR_TRACE_SET(pm_regs.pm_cntrl.spu_addr_trace);
398
378 /* 399 /*
379 * Routine set_count_mode must be called previously to set 400 * Routine set_count_mode must be called previously to set
380 * the count mode based on the user selection of user and kernel. 401 * the count mode based on the user selection of user and kernel.
@@ -563,9 +584,184 @@ static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr,
563 return 0; 584 return 0;
564} 585}
565 586
587/* Unfortunately, the hardware will only support event profiling
588 * on one SPU per node at a time. Therefore, we must time slice
589 * the profiling across all SPUs in the node. Note, we do this
590 * in parallel for each node. The following routine is called
591 * periodically based on kernel timer to switch which SPU is
592 * being monitored in a round robbin fashion.
593 */
594static void spu_evnt_swap(unsigned long data)
595{
596 int node;
597 int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx;
598 unsigned long flags;
599 int cpu;
600 int ret;
601 u32 interrupt_mask;
602
603
604 /* enable interrupts on cntr 0 */
605 interrupt_mask = CBE_PM_CTR_OVERFLOW_INTR(0);
606
607 hdw_thread = 0;
608
609 /* Make sure spu event interrupt handler and spu event swap
610 * don't access the counters simultaneously.
611 */
612 spin_lock_irqsave(&cntr_lock, flags);
613
614 cur_spu_evnt_phys_spu_indx = spu_evnt_phys_spu_indx;
615
616 if (++(spu_evnt_phys_spu_indx) == NUM_SPUS_PER_NODE)
617 spu_evnt_phys_spu_indx = 0;
618
619 pm_signal[0].sub_unit = spu_evnt_phys_spu_indx;
620 pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
621 pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
622
623 /* switch the SPU being profiled on each node */
624 for_each_online_cpu(cpu) {
625 if (cbe_get_hw_thread_id(cpu))
626 continue;
627
628 node = cbe_cpu_to_node(cpu);
629 cur_phys_spu = (node * NUM_SPUS_PER_NODE)
630 + cur_spu_evnt_phys_spu_indx;
631 nxt_phys_spu = (node * NUM_SPUS_PER_NODE)
632 + spu_evnt_phys_spu_indx;
633
634 /*
635 * stop counters, save counter values, restore counts
636 * for previous physical SPU
637 */
638 cbe_disable_pm(cpu);
639 cbe_disable_pm_interrupts(cpu);
640
641 spu_pm_cnt[cur_phys_spu]
642 = cbe_read_ctr(cpu, 0);
643
644 /* restore previous count for the next spu to sample */
645 /* NOTE, hardware issue, counter will not start if the
646 * counter value is at max (0xFFFFFFFF).
647 */
648 if (spu_pm_cnt[nxt_phys_spu] >= 0xFFFFFFFF)
649 cbe_write_ctr(cpu, 0, 0xFFFFFFF0);
650 else
651 cbe_write_ctr(cpu, 0, spu_pm_cnt[nxt_phys_spu]);
652
653 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
654
655 /* setup the debug bus measure the one event and
656 * the two events to route the next SPU's PC on
657 * the debug bus
658 */
659 ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 3);
660 if (ret)
661 printk(KERN_ERR
662 "%s: pm_rtas_activate_signals failed, SPU event swap\n",
663 __func__);
664
665 /* clear the trace buffer, don't want to take PC for
666 * previous SPU*/
667 cbe_write_pm(cpu, trace_address, 0);
668
669 enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
670
671 /* Enable interrupts on the CPU thread that is starting */
672 cbe_enable_pm_interrupts(cpu, hdw_thread,
673 interrupt_mask);
674 cbe_enable_pm(cpu);
675 }
676
677 spin_unlock_irqrestore(&cntr_lock, flags);
678
679 /* swap approximately every 0.1 seconds */
680 mod_timer(&timer_spu_event_swap, jiffies + HZ / 25);
681}
682
683static void start_spu_event_swap(void)
684{
685 init_timer(&timer_spu_event_swap);
686 timer_spu_event_swap.function = spu_evnt_swap;
687 timer_spu_event_swap.data = 0UL;
688 timer_spu_event_swap.expires = jiffies + HZ / 25;
689 add_timer(&timer_spu_event_swap);
690}
691
692static int cell_reg_setup_spu_events(struct op_counter_config *ctr,
693 struct op_system_config *sys, int num_ctrs)
694{
695 int i;
696
697 /* routine is called once for all nodes */
698
699 spu_evnt_phys_spu_indx = 0;
700 /*
701 * For all events except PPU CYCLEs, each node will need to make
702 * the rtas cbe-perftools call to setup and reset the debug bus.
703 * Make the token lookup call once and store it in the global
704 * variable pm_rtas_token.
705 */
706 pm_rtas_token = rtas_token("ibm,cbe-perftools");
707
708 if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
709 printk(KERN_ERR
710 "%s: rtas token ibm,cbe-perftools unknown\n",
711 __func__);
712 return -EIO;
713 }
714
715 /* setup the pm_control register settings,
716 * settings will be written per node by the
717 * cell_cpu_setup() function.
718 */
719 pm_regs.pm_cntrl.trace_buf_ovflw = 1;
720
721 /* Use the occurrence trace mode to have SPU PC saved
722 * to the trace buffer. Occurrence data in trace buffer
723 * is not used. Bit 2 must be set to store SPU addresses.
724 */
725 pm_regs.pm_cntrl.trace_mode = 2;
726
727 pm_regs.pm_cntrl.spu_addr_trace = 0x1; /* using debug bus
728 event 2 & 3 */
729
730 /* setup the debug bus event array with the SPU PC routing events.
731 * Note, pm_signal[0] will be filled in by set_pm_event() call below.
732 */
733 pm_signal[1].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
734 pm_signal[1].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_A);
735 pm_signal[1].bit = SPU_PROFILE_EVENT_ADDR % 100;
736 pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
737
738 pm_signal[2].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
739 pm_signal[2].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_B);
740 pm_signal[2].bit = SPU_PROFILE_EVENT_ADDR % 100;
741 pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
742
743 /* Set the user selected spu event to profile on,
744 * note, only one SPU profiling event is supported
745 */
746 num_counters = 1; /* Only support one SPU event at a time */
747 set_pm_event(0, ctr[0].event, ctr[0].unit_mask);
748
749 reset_value[0] = 0xFFFFFFFF - ctr[0].count;
750
751 /* global, used by cell_cpu_setup */
752 ctr_enabled |= 1;
753
754 /* Initialize the count for each SPU to the reset value */
755 for (i=0; i < MAX_NUMNODES * NUM_SPUS_PER_NODE; i++)
756 spu_pm_cnt[i] = reset_value[0];
757
758 return 0;
759}
760
566static int cell_reg_setup_ppu(struct op_counter_config *ctr, 761static int cell_reg_setup_ppu(struct op_counter_config *ctr,
567 struct op_system_config *sys, int num_ctrs) 762 struct op_system_config *sys, int num_ctrs)
568{ 763{
764 /* routine is called once for all nodes */
569 int i, j, cpu; 765 int i, j, cpu;
570 766
571 num_counters = num_ctrs; 767 num_counters = num_ctrs;
@@ -577,14 +773,6 @@ static int cell_reg_setup_ppu(struct op_counter_config *ctr,
577 __func__); 773 __func__);
578 return -EIO; 774 return -EIO;
579 } 775 }
580 pm_regs.group_control = 0;
581 pm_regs.debug_bus_control = 0;
582
583 /* setup the pm_control register */
584 memset(&pm_regs.pm_cntrl, 0, sizeof(struct pm_cntrl));
585 pm_regs.pm_cntrl.stop_at_max = 1;
586 pm_regs.pm_cntrl.trace_mode = 0;
587 pm_regs.pm_cntrl.freeze = 1;
588 776
589 set_count_mode(sys->enable_kernel, sys->enable_user); 777 set_count_mode(sys->enable_kernel, sys->enable_user);
590 778
@@ -657,10 +845,20 @@ static int cell_reg_setup_ppu(struct op_counter_config *ctr,
657static int cell_reg_setup(struct op_counter_config *ctr, 845static int cell_reg_setup(struct op_counter_config *ctr,
658 struct op_system_config *sys, int num_ctrs) 846 struct op_system_config *sys, int num_ctrs)
659{ 847{
660 int ret; 848 int ret=0;
661
662 spu_cycle_reset = 0; 849 spu_cycle_reset = 0;
663 850
851 /* initialize the spu_arr_trace value, will be reset if
852 * doing spu event profiling.
853 */
854 pm_regs.group_control = 0;
855 pm_regs.debug_bus_control = 0;
856 pm_regs.pm_cntrl.stop_at_max = 1;
857 pm_regs.pm_cntrl.trace_mode = 0;
858 pm_regs.pm_cntrl.freeze = 1;
859 pm_regs.pm_cntrl.trace_buf_ovflw = 0;
860 pm_regs.pm_cntrl.spu_addr_trace = 0;
861
664 /* 862 /*
665 * For all events except PPU CYCLEs, each node will need to make 863 * For all events except PPU CYCLEs, each node will need to make
666 * the rtas cbe-perftools call to setup and reset the debug bus. 864 * the rtas cbe-perftools call to setup and reset the debug bus.
@@ -679,6 +877,18 @@ static int cell_reg_setup(struct op_counter_config *ctr,
679 if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { 877 if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
680 profiling_mode = SPU_PROFILING_CYCLES; 878 profiling_mode = SPU_PROFILING_CYCLES;
681 ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs); 879 ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs);
880 } else if ((ctr[0].event >= SPU_EVENT_NUM_START) &&
881 (ctr[0].event <= SPU_EVENT_NUM_STOP)) {
882 profiling_mode = SPU_PROFILING_EVENTS;
883 spu_cycle_reset = ctr[0].count;
884
885 /* for SPU event profiling, need to setup the
886 * pm_signal array with the events to route the
887 * SPU PC before making the FW call. Note, only
888 * one SPU event for profiling can be specified
889 * at a time.
890 */
891 cell_reg_setup_spu_events(ctr, sys, num_ctrs);
682 } else { 892 } else {
683 profiling_mode = PPU_PROFILING; 893 profiling_mode = PPU_PROFILING;
684 ret = cell_reg_setup_ppu(ctr, sys, num_ctrs); 894 ret = cell_reg_setup_ppu(ctr, sys, num_ctrs);
@@ -695,6 +905,7 @@ static int cell_cpu_setup(struct op_counter_config *cntr)
695 u32 cpu = smp_processor_id(); 905 u32 cpu = smp_processor_id();
696 u32 num_enabled = 0; 906 u32 num_enabled = 0;
697 int i; 907 int i;
908 int ret;
698 909
699 /* Cycle based SPU profiling does not use the performance 910 /* Cycle based SPU profiling does not use the performance
700 * counters. The trace array is configured to collect 911 * counters. The trace array is configured to collect
@@ -729,7 +940,20 @@ static int cell_cpu_setup(struct op_counter_config *cntr)
729 * The pm_rtas_activate_signals will return -EIO if the FW 940 * The pm_rtas_activate_signals will return -EIO if the FW
730 * call failed. 941 * call failed.
731 */ 942 */
732 return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); 943 if (profiling_mode == SPU_PROFILING_EVENTS) {
944 /* For SPU event profiling also need to setup the
945 * pm interval timer
946 */
947 ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
948 num_enabled+2);
949 /* store PC from debug bus to Trace buffer as often
950 * as possible (every 10 cycles)
951 */
952 cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
953 return ret;
954 } else
955 return pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
956 num_enabled);
733} 957}
734 958
735#define ENTRIES 303 959#define ENTRIES 303
@@ -926,6 +1150,7 @@ static void cell_global_stop_spu_cycles(void)
926 int cpu; 1150 int cpu;
927 1151
928 oprofile_running = 0; 1152 oprofile_running = 0;
1153 smp_wmb();
929 1154
930#ifdef CONFIG_CPU_FREQ 1155#ifdef CONFIG_CPU_FREQ
931 cpufreq_unregister_notifier(&cpu_freq_notifier_block, 1156 cpufreq_unregister_notifier(&cpu_freq_notifier_block,
@@ -957,8 +1182,33 @@ static void cell_global_stop_spu_cycles(void)
957 pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 1182 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
958 } 1183 }
959 1184
960 if (profiling_mode == SPU_PROFILING_CYCLES) 1185 stop_spu_profiling_cycles();
961 stop_spu_profiling_cycles(); 1186}
1187
1188static void cell_global_stop_spu_events(void)
1189{
1190 int cpu;
1191 oprofile_running = 0;
1192
1193 stop_spu_profiling_events();
1194 smp_wmb();
1195
1196 for_each_online_cpu(cpu) {
1197 if (cbe_get_hw_thread_id(cpu))
1198 continue;
1199
1200 cbe_sync_irq(cbe_cpu_to_node(cpu));
1201 /* Stop the counters */
1202 cbe_disable_pm(cpu);
1203 cbe_write_pm07_control(cpu, 0, 0);
1204
1205 /* Deactivate the signals */
1206 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1207
1208 /* Deactivate interrupts */
1209 cbe_disable_pm_interrupts(cpu);
1210 }
1211 del_timer_sync(&timer_spu_event_swap);
962} 1212}
963 1213
964static void cell_global_stop_ppu(void) 1214static void cell_global_stop_ppu(void)
@@ -994,6 +1244,8 @@ static void cell_global_stop(void)
994{ 1244{
995 if (profiling_mode == PPU_PROFILING) 1245 if (profiling_mode == PPU_PROFILING)
996 cell_global_stop_ppu(); 1246 cell_global_stop_ppu();
1247 else if (profiling_mode == SPU_PROFILING_EVENTS)
1248 cell_global_stop_spu_events();
997 else 1249 else
998 cell_global_stop_spu_cycles(); 1250 cell_global_stop_spu_cycles();
999} 1251}
@@ -1088,6 +1340,69 @@ out:
1088 return rtas_error; 1340 return rtas_error;
1089} 1341}
1090 1342
1343static int cell_global_start_spu_events(struct op_counter_config *ctr)
1344{
1345 int cpu;
1346 u32 interrupt_mask = 0;
1347 int rtn = 0;
1348
1349 hdw_thread = 0;
1350
1351 /* spu event profiling, uses the performance counters to generate
1352 * an interrupt. The hardware is setup to store the SPU program
1353 * counter into the trace array. The occurrence mode is used to
1354 * enable storing data to the trace buffer. The bits are set
1355 * to send/store the SPU address in the trace buffer. The debug
1356 * bus must be setup to route the SPU program counter onto the
1357 * debug bus. The occurrence data in the trace buffer is not used.
1358 */
1359
1360 /* This routine gets called once for the system.
1361 * There is one performance monitor per node, so we
1362 * only need to perform this function once per node.
1363 */
1364
1365 for_each_online_cpu(cpu) {
1366 if (cbe_get_hw_thread_id(cpu))
1367 continue;
1368
1369 /*
1370 * Setup SPU event-based profiling.
1371 * Set perf_mon_control bit 0 to a zero before
1372 * enabling spu collection hardware.
1373 *
1374 * Only support one SPU event on one SPU per node.
1375 */
1376 if (ctr_enabled & 1) {
1377 cbe_write_ctr(cpu, 0, reset_value[0]);
1378 enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
1379 interrupt_mask |=
1380 CBE_PM_CTR_OVERFLOW_INTR(0);
1381 } else {
1382 /* Disable counter */
1383 cbe_write_pm07_control(cpu, 0, 0);
1384 }
1385
1386 cbe_get_and_clear_pm_interrupts(cpu);
1387 cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask);
1388 cbe_enable_pm(cpu);
1389
1390 /* clear the trace buffer */
1391 cbe_write_pm(cpu, trace_address, 0);
1392 }
1393
1394 /* Start the timer to time slice collecting the event profile
1395 * on each of the SPUs. Note, can collect profile on one SPU
1396 * per node at a time.
1397 */
1398 start_spu_event_swap();
1399 start_spu_profiling_events();
1400 oprofile_running = 1;
1401 smp_wmb();
1402
1403 return rtn;
1404}
1405
1091static int cell_global_start_ppu(struct op_counter_config *ctr) 1406static int cell_global_start_ppu(struct op_counter_config *ctr)
1092{ 1407{
1093 u32 cpu, i; 1408 u32 cpu, i;
@@ -1139,11 +1454,158 @@ static int cell_global_start(struct op_counter_config *ctr)
1139{ 1454{
1140 if (profiling_mode == SPU_PROFILING_CYCLES) 1455 if (profiling_mode == SPU_PROFILING_CYCLES)
1141 return cell_global_start_spu_cycles(ctr); 1456 return cell_global_start_spu_cycles(ctr);
1457 else if (profiling_mode == SPU_PROFILING_EVENTS)
1458 return cell_global_start_spu_events(ctr);
1142 else 1459 else
1143 return cell_global_start_ppu(ctr); 1460 return cell_global_start_ppu(ctr);
1144} 1461}
1145 1462
1146 1463
1464/* The SPU interrupt handler
1465 *
1466 * SPU event profiling works as follows:
1467 * The pm_signal[0] holds the one SPU event to be measured. It is routed on
1468 * the debug bus using word 0 or 1. The value of pm_signal[1] and
1469 * pm_signal[2] contain the necessary events to route the SPU program
1470 * counter for the selected SPU onto the debug bus using words 2 and 3.
1471 * The pm_interval register is setup to write the SPU PC value into the
1472 * trace buffer at the maximum rate possible. The trace buffer is configured
1473 * to store the PCs, wrapping when it is full. The performance counter is
1474 * intialized to the max hardware count minus the number of events, N, between
1475 * samples. Once the N events have occured, a HW counter overflow occurs
1476 * causing the generation of a HW counter interrupt which also stops the
1477 * writing of the SPU PC values to the trace buffer. Hence the last PC
1478 * written to the trace buffer is the SPU PC that we want. Unfortunately,
1479 * we have to read from the beginning of the trace buffer to get to the
1480 * last value written. We just hope the PPU has nothing better to do then
1481 * service this interrupt. The PC for the specific SPU being profiled is
1482 * extracted from the trace buffer processed and stored. The trace buffer
1483 * is cleared, interrupts are cleared, the counter is reset to max - N.
1484 * A kernel timer is used to periodically call the routine spu_evnt_swap()
1485 * to switch to the next physical SPU in the node to profile in round robbin
1486 * order. This way data is collected for all SPUs on the node. It does mean
1487 * that we need to use a relatively small value of N to ensure enough samples
1488 * on each SPU are collected each SPU is being profiled 1/8 of the time.
1489 * It may also be necessary to use a longer sample collection period.
1490 */
1491static void cell_handle_interrupt_spu(struct pt_regs *regs,
1492 struct op_counter_config *ctr)
1493{
1494 u32 cpu, cpu_tmp;
1495 u64 trace_entry;
1496 u32 interrupt_mask;
1497 u64 trace_buffer[2];
1498 u64 last_trace_buffer;
1499 u32 sample;
1500 u32 trace_addr;
1501 unsigned long sample_array_lock_flags;
1502 int spu_num;
1503 unsigned long flags;
1504
1505 /* Make sure spu event interrupt handler and spu event swap
1506 * don't access the counters simultaneously.
1507 */
1508 cpu = smp_processor_id();
1509 spin_lock_irqsave(&cntr_lock, flags);
1510
1511 cpu_tmp = cpu;
1512 cbe_disable_pm(cpu);
1513
1514 interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
1515
1516 sample = 0xABCDEF;
1517 trace_entry = 0xfedcba;
1518 last_trace_buffer = 0xdeadbeaf;
1519
1520 if ((oprofile_running == 1) && (interrupt_mask != 0)) {
1521 /* disable writes to trace buff */
1522 cbe_write_pm(cpu, pm_interval, 0);
1523
1524 /* only have one perf cntr being used, cntr 0 */
1525 if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(0))
1526 && ctr[0].enabled)
1527 /* The SPU PC values will be read
1528 * from the trace buffer, reset counter
1529 */
1530
1531 cbe_write_ctr(cpu, 0, reset_value[0]);
1532
1533 trace_addr = cbe_read_pm(cpu, trace_address);
1534
1535 while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
1536 /* There is data in the trace buffer to process
1537 * Read the buffer until you get to the last
1538 * entry. This is the value we want.
1539 */
1540
1541 cbe_read_trace_buffer(cpu, trace_buffer);
1542 trace_addr = cbe_read_pm(cpu, trace_address);
1543 }
1544
1545 /* SPU Address 16 bit count format for 128 bit
1546 * HW trace buffer is used for the SPU PC storage
1547 * HDR bits 0:15
1548 * SPU Addr 0 bits 16:31
1549 * SPU Addr 1 bits 32:47
1550 * unused bits 48:127
1551 *
1552 * HDR: bit4 = 1 SPU Address 0 valid
1553 * HDR: bit5 = 1 SPU Address 1 valid
1554 * - unfortunately, the valid bits don't seem to work
1555 *
1556 * Note trace_buffer[0] holds bits 0:63 of the HW
1557 * trace buffer, trace_buffer[1] holds bits 64:127
1558 */
1559
1560 trace_entry = trace_buffer[0]
1561 & 0x00000000FFFF0000;
1562
1563 /* only top 16 of the 18 bit SPU PC address
1564 * is stored in trace buffer, hence shift right
1565 * by 16 -2 bits */
1566 sample = trace_entry >> 14;
1567 last_trace_buffer = trace_buffer[0];
1568
1569 spu_num = spu_evnt_phys_spu_indx
1570 + (cbe_cpu_to_node(cpu) * NUM_SPUS_PER_NODE);
1571
1572 /* make sure only one process at a time is calling
1573 * spu_sync_buffer()
1574 */
1575 spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
1576 sample_array_lock_flags);
1577 spu_sync_buffer(spu_num, &sample, 1);
1578 spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
1579 sample_array_lock_flags);
1580
1581 smp_wmb(); /* insure spu event buffer updates are written
1582 * don't want events intermingled... */
1583
1584 /* The counters were frozen by the interrupt.
1585 * Reenable the interrupt and restart the counters.
1586 */
1587 cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
1588 cbe_enable_pm_interrupts(cpu, hdw_thread,
1589 virt_cntr_inter_mask);
1590
1591 /* clear the trace buffer, re-enable writes to trace buff */
1592 cbe_write_pm(cpu, trace_address, 0);
1593 cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
1594
1595 /* The writes to the various performance counters only writes
1596 * to a latch. The new values (interrupt setting bits, reset
1597 * counter value etc.) are not copied to the actual registers
1598 * until the performance monitor is enabled. In order to get
1599 * this to work as desired, the permormance monitor needs to
1600 * be disabled while writing to the latches. This is a
1601 * HW design issue.
1602 */
1603 write_pm_cntrl(cpu);
1604 cbe_enable_pm(cpu);
1605 }
1606 spin_unlock_irqrestore(&cntr_lock, flags);
1607}
1608
1147static void cell_handle_interrupt_ppu(struct pt_regs *regs, 1609static void cell_handle_interrupt_ppu(struct pt_regs *regs,
1148 struct op_counter_config *ctr) 1610 struct op_counter_config *ctr)
1149{ 1611{
@@ -1222,6 +1684,8 @@ static void cell_handle_interrupt(struct pt_regs *regs,
1222{ 1684{
1223 if (profiling_mode == PPU_PROFILING) 1685 if (profiling_mode == PPU_PROFILING)
1224 cell_handle_interrupt_ppu(regs, ctr); 1686 cell_handle_interrupt_ppu(regs, ctr);
1687 else
1688 cell_handle_interrupt_spu(regs, ctr);
1225} 1689}
1226 1690
1227/* 1691/*