aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorBob Nelson <rrnelson@linux.vnet.ibm.com>2007-07-20 15:39:53 -0400
committerArnd Bergmann <arnd@klappe.arndb.de>2007-07-20 15:42:24 -0400
commit1474855d0878cced6f39f51f3c2bd7428b44cb1e (patch)
treecbad42404bfc0f7222d0a88e4ed9b0e9e0d0cb50 /arch/powerpc
parent36aaccc1e96481e8310b1d13600096da0f24ff43 (diff)
[CELL] oprofile: add support to OProfile for profiling CELL BE SPUs
From: Maynard Johnson <mpjohn@us.ibm.com> This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Exports spu_set_profile_private_kref and spu_get_profile_private_kref which are used by OProfile to store private profile information in spufs data structures. Also incorporated several fixes from other patches (rrn). Check pointer returned from kzalloc. Eliminated unnecessary cast. Better error handling and cleanup in the related area. 64-bit unsigned long parameter was being demoted to 32-bit unsigned int and eventually promoted back to unsigned long. Signed-off-by: Carl Love <carll@us.ibm.com> Signed-off-by: Maynard Johnson <mpjohn@us.ibm.com> Signed-off-by: Bob Nelson <rrnelson@us.ibm.com> Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com> Acked-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/configs/cell_defconfig3
-rw-r--r--arch/powerpc/kernel/time.c1
-rw-r--r--arch/powerpc/oprofile/Kconfig7
-rw-r--r--arch/powerpc/oprofile/Makefile4
-rw-r--r--arch/powerpc/oprofile/cell/pr_util.h97
-rw-r--r--arch/powerpc/oprofile/cell/spu_profiler.c221
-rw-r--r--arch/powerpc/oprofile/cell/spu_task_sync.c484
-rw-r--r--arch/powerpc/oprofile/cell/vma_map.c287
-rw-r--r--arch/powerpc/oprofile/common.c51
-rw-r--r--arch/powerpc/oprofile/op_model_7450.c14
-rw-r--r--arch/powerpc/oprofile/op_model_cell.c607
-rw-r--r--arch/powerpc/oprofile/op_model_fsl_booke.c11
-rw-r--r--arch/powerpc/oprofile/op_model_pa6t.c12
-rw-r--r--arch/powerpc/oprofile/op_model_power4.c11
-rw-r--r--arch/powerpc/oprofile/op_model_rs64.c10
-rw-r--r--arch/powerpc/platforms/cell/spufs/context.c20
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/spufs.h2
18 files changed, 1738 insertions, 108 deletions
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index 74f83f4a4e5e..d9ac24e8de16 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -1455,7 +1455,8 @@ CONFIG_HAS_DMA=y
1455# Instrumentation Support 1455# Instrumentation Support
1456# 1456#
1457CONFIG_PROFILING=y 1457CONFIG_PROFILING=y
1458CONFIG_OPROFILE=y 1458CONFIG_OPROFILE=m
1459CONFIG_OPROFILE_CELL=y
1459# CONFIG_KPROBES is not set 1460# CONFIG_KPROBES is not set
1460 1461
1461# 1462#
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e5df167f7824..727a6699f2f4 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -122,6 +122,7 @@ extern struct timezone sys_tz;
122static long timezone_offset; 122static long timezone_offset;
123 123
124unsigned long ppc_proc_freq; 124unsigned long ppc_proc_freq;
125EXPORT_SYMBOL(ppc_proc_freq);
125unsigned long ppc_tb_freq; 126unsigned long ppc_tb_freq;
126 127
127static u64 tb_last_jiffy __cacheline_aligned_in_smp; 128static u64 tb_last_jiffy __cacheline_aligned_in_smp;
diff --git a/arch/powerpc/oprofile/Kconfig b/arch/powerpc/oprofile/Kconfig
index eb2dece76a54..7089e79689b9 100644
--- a/arch/powerpc/oprofile/Kconfig
+++ b/arch/powerpc/oprofile/Kconfig
@@ -15,3 +15,10 @@ config OPROFILE
15 15
16 If unsure, say N. 16 If unsure, say N.
17 17
18config OPROFILE_CELL
19 bool "OProfile for Cell Broadband Engine"
20 depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)
21 default y
22 help
23 Profiling of Cell BE SPUs requires special support enabled
24 by this option.
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile
index 4b5f9528218c..c5f64c3bd668 100644
--- a/arch/powerpc/oprofile/Makefile
+++ b/arch/powerpc/oprofile/Makefile
@@ -11,7 +11,9 @@ DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
11 timer_int.o ) 11 timer_int.o )
12 12
13oprofile-y := $(DRIVER_OBJS) common.o backtrace.o 13oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
14oprofile-$(CONFIG_PPC_CELL_NATIVE) += op_model_cell.o 14oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \
15 cell/spu_profiler.o cell/vma_map.o \
16 cell/spu_task_sync.o
15oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o 17oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o
16oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o 18oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o
17oprofile-$(CONFIG_6xx) += op_model_7450.o 19oprofile-$(CONFIG_6xx) += op_model_7450.o
diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h
new file mode 100644
index 000000000000..e5704f00c8b4
--- /dev/null
+++ b/arch/powerpc/oprofile/cell/pr_util.h
@@ -0,0 +1,97 @@
1 /*
2 * Cell Broadband Engine OProfile Support
3 *
4 * (C) Copyright IBM Corporation 2006
5 *
6 * Author: Maynard Johnson <maynardj@us.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#ifndef PR_UTIL_H
15#define PR_UTIL_H
16
17#include <linux/cpumask.h>
18#include <linux/oprofile.h>
19#include <asm/cell-pmu.h>
20#include <asm/spu.h>
21
22#include "../../platforms/cell/cbe_regs.h"
23
24/* Defines used for sync_start */
25#define SKIP_GENERIC_SYNC 0
26#define SYNC_START_ERROR -1
27#define DO_GENERIC_SYNC 1
28
29struct spu_overlay_info { /* map of sections within an SPU overlay */
30 unsigned int vma; /* SPU virtual memory address from elf */
31 unsigned int size; /* size of section from elf */
32 unsigned int offset; /* offset of section into elf file */
33 unsigned int buf;
34};
35
36struct vma_to_fileoffset_map { /* map of sections within an SPU program */
37 struct vma_to_fileoffset_map *next; /* list pointer */
38 unsigned int vma; /* SPU virtual memory address from elf */
39 unsigned int size; /* size of section from elf */
40 unsigned int offset; /* offset of section into elf file */
41 unsigned int guard_ptr;
42 unsigned int guard_val;
43 /*
44 * The guard pointer is an entry in the _ovly_buf_table,
45 * computed using ovly.buf as the index into the table. Since
46 * ovly.buf values begin at '1' to reference the first (or 0th)
47 * entry in the _ovly_buf_table, the computation subtracts 1
48 * from ovly.buf.
49 * The guard value is stored in the _ovly_buf_table entry and
50 * is an index (starting at 1) back to the _ovly_table entry
51 * that is pointing at this _ovly_buf_table entry. So, for
52 * example, for an overlay scenario with one overlay segment
53 * and two overlay sections:
54 * - Section 1 points to the first entry of the
55 * _ovly_buf_table, which contains a guard value
56 * of '1', referencing the first (index=0) entry of
57 * _ovly_table.
58 * - Section 2 points to the second entry of the
59 * _ovly_buf_table, which contains a guard value
60 * of '2', referencing the second (index=1) entry of
61 * _ovly_table.
62 */
63
64};
65
66/* The three functions below are for maintaining and accessing
67 * the vma-to-fileoffset map.
68 */
69struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
70 u64 objectid);
71unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
72 unsigned int vma, const struct spu *aSpu,
73 int *grd_val);
74void vma_map_free(struct vma_to_fileoffset_map *map);
75
76/*
77 * Entry point for SPU profiling.
78 * cycles_reset is the SPU_CYCLES count value specified by the user.
79 */
80int start_spu_profiling(unsigned int cycles_reset);
81
82void stop_spu_profiling(void);
83
84
85/* add the necessary profiling hooks */
86int spu_sync_start(void);
87
88/* remove the hooks */
89int spu_sync_stop(void);
90
91/* Record SPU program counter samples to the oprofile event buffer. */
92void spu_sync_buffer(int spu_num, unsigned int *samples,
93 int num_samples);
94
95void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset);
96
97#endif /* PR_UTIL_H */
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c
new file mode 100644
index 000000000000..380d7e217531
--- /dev/null
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -0,0 +1,221 @@
1/*
2 * Cell Broadband Engine OProfile Support
3 *
4 * (C) Copyright IBM Corporation 2006
5 *
6 * Authors: Maynard Johnson <maynardj@us.ibm.com>
7 * Carl Love <carll@us.ibm.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#include <linux/hrtimer.h>
16#include <linux/smp.h>
17#include <linux/slab.h>
18#include <asm/cell-pmu.h>
19#include "pr_util.h"
20
21#define TRACE_ARRAY_SIZE 1024
22#define SCALE_SHIFT 14
23
24static u32 *samples;
25
26static int spu_prof_running;
27static unsigned int profiling_interval;
28
29#define NUM_SPU_BITS_TRBUF 16
30#define SPUS_PER_TB_ENTRY 4
31#define SPUS_PER_NODE 8
32
33#define SPU_PC_MASK 0xFFFF
34
35static DEFINE_SPINLOCK(sample_array_lock);
36unsigned long sample_array_lock_flags;
37
38void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
39{
40 unsigned long ns_per_cyc;
41
42 if (!freq_khz)
43 freq_khz = ppc_proc_freq/1000;
44
45 /* To calculate a timeout in nanoseconds, the basic
46 * formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency).
47 * To avoid floating point math, we use the scale math
48 * technique as described in linux/jiffies.h. We use
49 * a scale factor of SCALE_SHIFT, which provides 4 decimal places
50 * of precision. This is close enough for the purpose at hand.
51 *
52 * The value of the timeout should be small enough that the hw
53 * trace buffer will not get more then about 1/3 full for the
54 * maximum user specified (the LFSR value) hw sampling frequency.
55 * This is to ensure the trace buffer will never fill even if the
56 * kernel thread scheduling varies under a heavy system load.
57 */
58
59 ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz;
60 profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT;
61
62}
63
64/*
65 * Extract SPU PC from trace buffer entry
66 */
67static void spu_pc_extract(int cpu, int entry)
68{
69 /* the trace buffer is 128 bits */
70 u64 trace_buffer[2];
71 u64 spu_mask;
72 int spu;
73
74 spu_mask = SPU_PC_MASK;
75
76 /* Each SPU PC is 16 bits; hence, four spus in each of
77 * the two 64-bit buffer entries that make up the
78 * 128-bit trace_buffer entry. Process two 64-bit values
79 * simultaneously.
80 * trace[0] SPU PC contents are: 0 1 2 3
81 * trace[1] SPU PC contents are: 4 5 6 7
82 */
83
84 cbe_read_trace_buffer(cpu, trace_buffer);
85
86 for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) {
87 /* spu PC trace entry is upper 16 bits of the
88 * 18 bit SPU program counter
89 */
90 samples[spu * TRACE_ARRAY_SIZE + entry]
91 = (spu_mask & trace_buffer[0]) << 2;
92 samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry]
93 = (spu_mask & trace_buffer[1]) << 2;
94
95 trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF;
96 trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF;
97 }
98}
99
100static int cell_spu_pc_collection(int cpu)
101{
102 u32 trace_addr;
103 int entry;
104
105 /* process the collected SPU PC for the node */
106
107 entry = 0;
108
109 trace_addr = cbe_read_pm(cpu, trace_address);
110 while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
111 /* there is data in the trace buffer to process */
112 spu_pc_extract(cpu, entry);
113
114 entry++;
115
116 if (entry >= TRACE_ARRAY_SIZE)
117 /* spu_samples is full */
118 break;
119
120 trace_addr = cbe_read_pm(cpu, trace_address);
121 }
122
123 return entry;
124}
125
126
127static enum hrtimer_restart profile_spus(struct hrtimer *timer)
128{
129 ktime_t kt;
130 int cpu, node, k, num_samples, spu_num;
131
132 if (!spu_prof_running)
133 goto stop;
134
135 for_each_online_cpu(cpu) {
136 if (cbe_get_hw_thread_id(cpu))
137 continue;
138
139 node = cbe_cpu_to_node(cpu);
140
141 /* There should only be one kernel thread at a time processing
142 * the samples. In the very unlikely case that the processing
143 * is taking a very long time and multiple kernel threads are
144 * started to process the samples. Make sure only one kernel
145 * thread is working on the samples array at a time. The
146 * sample array must be loaded and then processed for a given
147 * cpu. The sample array is not per cpu.
148 */
149 spin_lock_irqsave(&sample_array_lock,
150 sample_array_lock_flags);
151 num_samples = cell_spu_pc_collection(cpu);
152
153 if (num_samples == 0) {
154 spin_unlock_irqrestore(&sample_array_lock,
155 sample_array_lock_flags);
156 continue;
157 }
158
159 for (k = 0; k < SPUS_PER_NODE; k++) {
160 spu_num = k + (node * SPUS_PER_NODE);
161 spu_sync_buffer(spu_num,
162 samples + (k * TRACE_ARRAY_SIZE),
163 num_samples);
164 }
165
166 spin_unlock_irqrestore(&sample_array_lock,
167 sample_array_lock_flags);
168
169 }
170 smp_wmb(); /* insure spu event buffer updates are written */
171 /* don't want events intermingled... */
172
173 kt = ktime_set(0, profiling_interval);
174 if (!spu_prof_running)
175 goto stop;
176 hrtimer_forward(timer, timer->base->get_time(), kt);
177 return HRTIMER_RESTART;
178
179 stop:
180 printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n");
181 return HRTIMER_NORESTART;
182}
183
184static struct hrtimer timer;
185/*
186 * Entry point for SPU profiling.
187 * NOTE: SPU profiling is done system-wide, not per-CPU.
188 *
189 * cycles_reset is the count value specified by the user when
190 * setting up OProfile to count SPU_CYCLES.
191 */
192int start_spu_profiling(unsigned int cycles_reset)
193{
194 ktime_t kt;
195
196 pr_debug("timer resolution: %lu\n", TICK_NSEC);
197 kt = ktime_set(0, profiling_interval);
198 hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
199 timer.expires = kt;
200 timer.function = profile_spus;
201
202 /* Allocate arrays for collecting SPU PC samples */
203 samples = kzalloc(SPUS_PER_NODE *
204 TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL);
205
206 if (!samples)
207 return -ENOMEM;
208
209 spu_prof_running = 1;
210 hrtimer_start(&timer, kt, HRTIMER_MODE_REL);
211
212 return 0;
213}
214
215void stop_spu_profiling(void)
216{
217 spu_prof_running = 0;
218 hrtimer_cancel(&timer);
219 kfree(samples);
220 pr_debug("SPU_PROF: stop_spu_profiling issued\n");
221}
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
new file mode 100644
index 000000000000..133665754a75
--- /dev/null
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -0,0 +1,484 @@
1/*
2 * Cell Broadband Engine OProfile Support
3 *
4 * (C) Copyright IBM Corporation 2006
5 *
6 * Author: Maynard Johnson <maynardj@us.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* The purpose of this file is to handle SPU event task switching
15 * and to record SPU context information into the OProfile
16 * event buffer.
17 *
18 * Additionally, the spu_sync_buffer function is provided as a helper
19 * for recoding actual SPU program counter samples to the event buffer.
20 */
21#include <linux/dcookies.h>
22#include <linux/kref.h>
23#include <linux/mm.h>
24#include <linux/module.h>
25#include <linux/notifier.h>
26#include <linux/numa.h>
27#include <linux/oprofile.h>
28#include <linux/spinlock.h>
29#include "pr_util.h"
30
31#define RELEASE_ALL 9999
32
33static DEFINE_SPINLOCK(buffer_lock);
34static DEFINE_SPINLOCK(cache_lock);
35static int num_spu_nodes;
36int spu_prof_num_nodes;
37int last_guard_val[MAX_NUMNODES * 8];
38
39/* Container for caching information about an active SPU task. */
40struct cached_info {
41 struct vma_to_fileoffset_map *map;
42 struct spu *the_spu; /* needed to access pointer to local_store */
43 struct kref cache_ref;
44};
45
46static struct cached_info *spu_info[MAX_NUMNODES * 8];
47
48static void destroy_cached_info(struct kref *kref)
49{
50 struct cached_info *info;
51
52 info = container_of(kref, struct cached_info, cache_ref);
53 vma_map_free(info->map);
54 kfree(info);
55 module_put(THIS_MODULE);
56}
57
58/* Return the cached_info for the passed SPU number.
59 * ATTENTION: Callers are responsible for obtaining the
60 * cache_lock if needed prior to invoking this function.
61 */
62static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num)
63{
64 struct kref *ref;
65 struct cached_info *ret_info;
66
67 if (spu_num >= num_spu_nodes) {
68 printk(KERN_ERR "SPU_PROF: "
69 "%s, line %d: Invalid index %d into spu info cache\n",
70 __FUNCTION__, __LINE__, spu_num);
71 ret_info = NULL;
72 goto out;
73 }
74 if (!spu_info[spu_num] && the_spu) {
75 ref = spu_get_profile_private_kref(the_spu->ctx);
76 if (ref) {
77 spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref);
78 kref_get(&spu_info[spu_num]->cache_ref);
79 }
80 }
81
82 ret_info = spu_info[spu_num];
83 out:
84 return ret_info;
85}
86
87
88/* Looks for cached info for the passed spu. If not found, the
89 * cached info is created for the passed spu.
90 * Returns 0 for success; otherwise, -1 for error.
91 */
92static int
93prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
94{
95 unsigned long flags;
96 struct vma_to_fileoffset_map *new_map;
97 int retval = 0;
98 struct cached_info *info;
99
100 /* We won't bother getting cache_lock here since
101 * don't do anything with the cached_info that's returned.
102 */
103 info = get_cached_info(spu, spu->number);
104
105 if (info) {
106 pr_debug("Found cached SPU info.\n");
107 goto out;
108 }
109
110 /* Create cached_info and set spu_info[spu->number] to point to it.
111 * spu->number is a system-wide value, not a per-node value.
112 */
113 info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
114 if (!info) {
115 printk(KERN_ERR "SPU_PROF: "
116 "%s, line %d: create vma_map failed\n",
117 __FUNCTION__, __LINE__);
118 retval = -ENOMEM;
119 goto err_alloc;
120 }
121 new_map = create_vma_map(spu, objectId);
122 if (!new_map) {
123 printk(KERN_ERR "SPU_PROF: "
124 "%s, line %d: create vma_map failed\n",
125 __FUNCTION__, __LINE__);
126 retval = -ENOMEM;
127 goto err_alloc;
128 }
129
130 pr_debug("Created vma_map\n");
131 info->map = new_map;
132 info->the_spu = spu;
133 kref_init(&info->cache_ref);
134 spin_lock_irqsave(&cache_lock, flags);
135 spu_info[spu->number] = info;
136 /* Increment count before passing off ref to SPUFS. */
137 kref_get(&info->cache_ref);
138
139 /* We increment the module refcount here since SPUFS is
140 * responsible for the final destruction of the cached_info,
141 * and it must be able to access the destroy_cached_info()
142 * function defined in the OProfile module. We decrement
143 * the module refcount in destroy_cached_info.
144 */
145 try_module_get(THIS_MODULE);
146 spu_set_profile_private_kref(spu->ctx, &info->cache_ref,
147 destroy_cached_info);
148 spin_unlock_irqrestore(&cache_lock, flags);
149 goto out;
150
151err_alloc:
152 kfree(info);
153out:
154 return retval;
155}
156
157/*
158 * NOTE: The caller is responsible for locking the
159 * cache_lock prior to calling this function.
160 */
161static int release_cached_info(int spu_index)
162{
163 int index, end;
164
165 if (spu_index == RELEASE_ALL) {
166 end = num_spu_nodes;
167 index = 0;
168 } else {
169 if (spu_index >= num_spu_nodes) {
170 printk(KERN_ERR "SPU_PROF: "
171 "%s, line %d: "
172 "Invalid index %d into spu info cache\n",
173 __FUNCTION__, __LINE__, spu_index);
174 goto out;
175 }
176 end = spu_index + 1;
177 index = spu_index;
178 }
179 for (; index < end; index++) {
180 if (spu_info[index]) {
181 kref_put(&spu_info[index]->cache_ref,
182 destroy_cached_info);
183 spu_info[index] = NULL;
184 }
185 }
186
187out:
188 return 0;
189}
190
191/* The source code for fast_get_dcookie was "borrowed"
192 * from drivers/oprofile/buffer_sync.c.
193 */
194
195/* Optimisation. We can manage without taking the dcookie sem
196 * because we cannot reach this code without at least one
197 * dcookie user still being registered (namely, the reader
198 * of the event buffer).
199 */
200static inline unsigned long fast_get_dcookie(struct dentry *dentry,
201 struct vfsmount *vfsmnt)
202{
203 unsigned long cookie;
204
205 if (dentry->d_cookie)
206 return (unsigned long)dentry;
207 get_dcookie(dentry, vfsmnt, &cookie);
208 return cookie;
209}
210
211/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
212 * which corresponds loosely to "application name". Also, determine
213 * the offset for the SPU ELF object. If computed offset is
214 * non-zero, it implies an embedded SPU object; otherwise, it's a
215 * separate SPU binary, in which case we retrieve it's dcookie.
216 * For the embedded case, we must determine if SPU ELF is embedded
217 * in the executable application or another file (i.e., shared lib).
218 * If embedded in a shared lib, we must get the dcookie and return
219 * that to the caller.
220 */
221static unsigned long
222get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
223 unsigned long *spu_bin_dcookie,
224 unsigned long spu_ref)
225{
226 unsigned long app_cookie = 0;
227 unsigned int my_offset = 0;
228 struct file *app = NULL;
229 struct vm_area_struct *vma;
230 struct mm_struct *mm = spu->mm;
231
232 if (!mm)
233 goto out;
234
235 down_read(&mm->mmap_sem);
236
237 for (vma = mm->mmap; vma; vma = vma->vm_next) {
238 if (!vma->vm_file)
239 continue;
240 if (!(vma->vm_flags & VM_EXECUTABLE))
241 continue;
242 app_cookie = fast_get_dcookie(vma->vm_file->f_dentry,
243 vma->vm_file->f_vfsmnt);
244 pr_debug("got dcookie for %s\n",
245 vma->vm_file->f_dentry->d_name.name);
246 app = vma->vm_file;
247 break;
248 }
249
250 for (vma = mm->mmap; vma; vma = vma->vm_next) {
251 if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref)
252 continue;
253 my_offset = spu_ref - vma->vm_start;
254 if (!vma->vm_file)
255 goto fail_no_image_cookie;
256
257 pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n",
258 my_offset, spu_ref,
259 vma->vm_file->f_dentry->d_name.name);
260 *offsetp = my_offset;
261 break;
262 }
263
264 *spu_bin_dcookie = fast_get_dcookie(vma->vm_file->f_dentry,
265 vma->vm_file->f_vfsmnt);
266 pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name);
267
268 up_read(&mm->mmap_sem);
269
270out:
271 return app_cookie;
272
273fail_no_image_cookie:
274 up_read(&mm->mmap_sem);
275
276 printk(KERN_ERR "SPU_PROF: "
277 "%s, line %d: Cannot find dcookie for SPU binary\n",
278 __FUNCTION__, __LINE__);
279 goto out;
280}
281
282
283
284/* This function finds or creates cached context information for the
285 * passed SPU and records SPU context information into the OProfile
286 * event buffer.
287 */
288static int process_context_switch(struct spu *spu, unsigned long objectId)
289{
290 unsigned long flags;
291 int retval;
292 unsigned int offset = 0;
293 unsigned long spu_cookie = 0, app_dcookie;
294
295 retval = prepare_cached_spu_info(spu, objectId);
296 if (retval)
297 goto out;
298
299 /* Get dcookie first because a mutex_lock is taken in that
300 * code path, so interrupts must not be disabled.
301 */
302 app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId);
303 if (!app_dcookie || !spu_cookie) {
304 retval = -ENOENT;
305 goto out;
306 }
307
308 /* Record context info in event buffer */
309 spin_lock_irqsave(&buffer_lock, flags);
310 add_event_entry(ESCAPE_CODE);
311 add_event_entry(SPU_CTX_SWITCH_CODE);
312 add_event_entry(spu->number);
313 add_event_entry(spu->pid);
314 add_event_entry(spu->tgid);
315 add_event_entry(app_dcookie);
316 add_event_entry(spu_cookie);
317 add_event_entry(offset);
318 spin_unlock_irqrestore(&buffer_lock, flags);
319 smp_wmb(); /* insure spu event buffer updates are written */
320 /* don't want entries intermingled... */
321out:
322 return retval;
323}
324
325/*
326 * This function is invoked on either a bind_context or unbind_context.
327 * If called for an unbind_context, the val arg is 0; otherwise,
328 * it is the object-id value for the spu context.
329 * The data arg is of type 'struct spu *'.
330 */
331static int spu_active_notify(struct notifier_block *self, unsigned long val,
332 void *data)
333{
334 int retval;
335 unsigned long flags;
336 struct spu *the_spu = data;
337
338 pr_debug("SPU event notification arrived\n");
339 if (!val) {
340 spin_lock_irqsave(&cache_lock, flags);
341 retval = release_cached_info(the_spu->number);
342 spin_unlock_irqrestore(&cache_lock, flags);
343 } else {
344 retval = process_context_switch(the_spu, val);
345 }
346 return retval;
347}
348
349static struct notifier_block spu_active = {
350 .notifier_call = spu_active_notify,
351};
352
353static int number_of_online_nodes(void)
354{
355 u32 cpu; u32 tmp;
356 int nodes = 0;
357 for_each_online_cpu(cpu) {
358 tmp = cbe_cpu_to_node(cpu) + 1;
359 if (tmp > nodes)
360 nodes++;
361 }
362 return nodes;
363}
364
365/* The main purpose of this function is to synchronize
366 * OProfile with SPUFS by registering to be notified of
367 * SPU task switches.
368 *
369 * NOTE: When profiling SPUs, we must ensure that only
370 * spu_sync_start is invoked and not the generic sync_start
371 * in drivers/oprofile/oprof.c. A return value of
372 * SKIP_GENERIC_SYNC or SYNC_START_ERROR will
373 * accomplish this.
374 */
375int spu_sync_start(void)
376{
377 int k;
378 int ret = SKIP_GENERIC_SYNC;
379 int register_ret;
380 unsigned long flags = 0;
381
382 spu_prof_num_nodes = number_of_online_nodes();
383 num_spu_nodes = spu_prof_num_nodes * 8;
384
385 spin_lock_irqsave(&buffer_lock, flags);
386 add_event_entry(ESCAPE_CODE);
387 add_event_entry(SPU_PROFILING_CODE);
388 add_event_entry(num_spu_nodes);
389 spin_unlock_irqrestore(&buffer_lock, flags);
390
391 /* Register for SPU events */
392 register_ret = spu_switch_event_register(&spu_active);
393 if (register_ret) {
394 ret = SYNC_START_ERROR;
395 goto out;
396 }
397
398 for (k = 0; k < (MAX_NUMNODES * 8); k++)
399 last_guard_val[k] = 0;
400 pr_debug("spu_sync_start -- running.\n");
401out:
402 return ret;
403}
404
405/* Record SPU program counter samples to the oprofile event buffer. */
406void spu_sync_buffer(int spu_num, unsigned int *samples,
407 int num_samples)
408{
409 unsigned long long file_offset;
410 unsigned long flags;
411 int i;
412 struct vma_to_fileoffset_map *map;
413 struct spu *the_spu;
414 unsigned long long spu_num_ll = spu_num;
415 unsigned long long spu_num_shifted = spu_num_ll << 32;
416 struct cached_info *c_info;
417
418 /* We need to obtain the cache_lock here because it's
419 * possible that after getting the cached_info, the SPU job
420 * corresponding to this cached_info may end, thus resulting
421 * in the destruction of the cached_info.
422 */
423 spin_lock_irqsave(&cache_lock, flags);
424 c_info = get_cached_info(NULL, spu_num);
425 if (!c_info) {
426 /* This legitimately happens when the SPU task ends before all
427 * samples are recorded.
428 * No big deal -- so we just drop a few samples.
429 */
430 pr_debug("SPU_PROF: No cached SPU contex "
431 "for SPU #%d. Dropping samples.\n", spu_num);
432 goto out;
433 }
434
435 map = c_info->map;
436 the_spu = c_info->the_spu;
437 spin_lock(&buffer_lock);
438 for (i = 0; i < num_samples; i++) {
439 unsigned int sample = *(samples+i);
440 int grd_val = 0;
441 file_offset = 0;
442 if (sample == 0)
443 continue;
444 file_offset = vma_map_lookup( map, sample, the_spu, &grd_val);
445
446 /* If overlays are used by this SPU application, the guard
447 * value is non-zero, indicating which overlay section is in
448 * use. We need to discard samples taken during the time
449 * period which an overlay occurs (i.e., guard value changes).
450 */
451 if (grd_val && grd_val != last_guard_val[spu_num]) {
452 last_guard_val[spu_num] = grd_val;
453 /* Drop the rest of the samples. */
454 break;
455 }
456
457 add_event_entry(file_offset | spu_num_shifted);
458 }
459 spin_unlock(&buffer_lock);
460out:
461 spin_unlock_irqrestore(&cache_lock, flags);
462}
463
464
465int spu_sync_stop(void)
466{
467 unsigned long flags = 0;
468 int ret = spu_switch_event_unregister(&spu_active);
469 if (ret) {
470 printk(KERN_ERR "SPU_PROF: "
471 "%s, line %d: spu_switch_event_unregister returned %d\n",
472 __FUNCTION__, __LINE__, ret);
473 goto out;
474 }
475
476 spin_lock_irqsave(&cache_lock, flags);
477 ret = release_cached_info(RELEASE_ALL);
478 spin_unlock_irqrestore(&cache_lock, flags);
479out:
480 pr_debug("spu_sync_stop -- done.\n");
481 return ret;
482}
483
484
diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c
new file mode 100644
index 000000000000..76ec1d16aef7
--- /dev/null
+++ b/arch/powerpc/oprofile/cell/vma_map.c
@@ -0,0 +1,287 @@
1/*
2 * Cell Broadband Engine OProfile Support
3 *
4 * (C) Copyright IBM Corporation 2006
5 *
6 * Author: Maynard Johnson <maynardj@us.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* The code in this source file is responsible for generating
15 * vma-to-fileOffset maps for both overlay and non-overlay SPU
16 * applications.
17 */
18
19#include <linux/mm.h>
20#include <linux/string.h>
21#include <linux/uaccess.h>
22#include <linux/elf.h>
23#include "pr_util.h"
24
25
26void vma_map_free(struct vma_to_fileoffset_map *map)
27{
28 while (map) {
29 struct vma_to_fileoffset_map *next = map->next;
30 kfree(map);
31 map = next;
32 }
33}
34
35unsigned int
36vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma,
37 const struct spu *aSpu, int *grd_val)
38{
39 /*
40 * Default the offset to the physical address + a flag value.
41 * Addresses of dynamically generated code can't be found in the vma
42 * map. For those addresses the flagged value will be sent on to
43 * the user space tools so they can be reported rather than just
44 * thrown away.
45 */
46 u32 offset = 0x10000000 + vma;
47 u32 ovly_grd;
48
49 for (; map; map = map->next) {
50 if (vma < map->vma || vma >= map->vma + map->size)
51 continue;
52
53 if (map->guard_ptr) {
54 ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr);
55 if (ovly_grd != map->guard_val)
56 continue;
57 *grd_val = ovly_grd;
58 }
59 offset = vma - map->vma + map->offset;
60 break;
61 }
62
63 return offset;
64}
65
66static struct vma_to_fileoffset_map *
67vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma,
68 unsigned int size, unsigned int offset, unsigned int guard_ptr,
69 unsigned int guard_val)
70{
71 struct vma_to_fileoffset_map *new =
72 kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
73 if (!new) {
74 printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
75 __FUNCTION__, __LINE__);
76 vma_map_free(map);
77 return NULL;
78 }
79
80 new->next = map;
81 new->vma = vma;
82 new->size = size;
83 new->offset = offset;
84 new->guard_ptr = guard_ptr;
85 new->guard_val = guard_val;
86
87 return new;
88}
89
90
91/* Parse SPE ELF header and generate a list of vma_maps.
92 * A pointer to the first vma_map in the generated list
93 * of vma_maps is returned. */
94struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu,
95 unsigned long spu_elf_start)
96{
97 static const unsigned char expected[EI_PAD] = {
98 [EI_MAG0] = ELFMAG0,
99 [EI_MAG1] = ELFMAG1,
100 [EI_MAG2] = ELFMAG2,
101 [EI_MAG3] = ELFMAG3,
102 [EI_CLASS] = ELFCLASS32,
103 [EI_DATA] = ELFDATA2MSB,
104 [EI_VERSION] = EV_CURRENT,
105 [EI_OSABI] = ELFOSABI_NONE
106 };
107
108 int grd_val;
109 struct vma_to_fileoffset_map *map = NULL;
110 struct spu_overlay_info ovly;
111 unsigned int overlay_tbl_offset = -1;
112 unsigned long phdr_start, shdr_start;
113 Elf32_Ehdr ehdr;
114 Elf32_Phdr phdr;
115 Elf32_Shdr shdr, shdr_str;
116 Elf32_Sym sym;
117 int i, j;
118 char name[32];
119
120 unsigned int ovly_table_sym = 0;
121 unsigned int ovly_buf_table_sym = 0;
122 unsigned int ovly_table_end_sym = 0;
123 unsigned int ovly_buf_table_end_sym = 0;
124 unsigned long ovly_table;
125 unsigned int n_ovlys;
126
127 /* Get and validate ELF header. */
128
129 if (copy_from_user(&ehdr, (void *) spu_elf_start, sizeof (ehdr)))
130 goto fail;
131
132 if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) {
133 printk(KERN_ERR "SPU_PROF: "
134 "%s, line %d: Unexpected e_ident parsing SPU ELF\n",
135 __FUNCTION__, __LINE__);
136 goto fail;
137 }
138 if (ehdr.e_machine != EM_SPU) {
139 printk(KERN_ERR "SPU_PROF: "
140 "%s, line %d: Unexpected e_machine parsing SPU ELF\n",
141 __FUNCTION__, __LINE__);
142 goto fail;
143 }
144 if (ehdr.e_type != ET_EXEC) {
145 printk(KERN_ERR "SPU_PROF: "
146 "%s, line %d: Unexpected e_type parsing SPU ELF\n",
147 __FUNCTION__, __LINE__);
148 goto fail;
149 }
150 phdr_start = spu_elf_start + ehdr.e_phoff;
151 shdr_start = spu_elf_start + ehdr.e_shoff;
152
153 /* Traverse program headers. */
154 for (i = 0; i < ehdr.e_phnum; i++) {
155 if (copy_from_user(&phdr,
156 (void *) (phdr_start + i * sizeof(phdr)),
157 sizeof(phdr)))
158 goto fail;
159
160 if (phdr.p_type != PT_LOAD)
161 continue;
162 if (phdr.p_flags & (1 << 27))
163 continue;
164
165 map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz,
166 phdr.p_offset, 0, 0);
167 if (!map)
168 goto fail;
169 }
170
171 pr_debug("SPU_PROF: Created non-overlay maps\n");
172 /* Traverse section table and search for overlay-related symbols. */
173 for (i = 0; i < ehdr.e_shnum; i++) {
174 if (copy_from_user(&shdr,
175 (void *) (shdr_start + i * sizeof(shdr)),
176 sizeof(shdr)))
177 goto fail;
178
179 if (shdr.sh_type != SHT_SYMTAB)
180 continue;
181 if (shdr.sh_entsize != sizeof (sym))
182 continue;
183
184 if (copy_from_user(&shdr_str,
185 (void *) (shdr_start + shdr.sh_link *
186 sizeof(shdr)),
187 sizeof(shdr)))
188 goto fail;
189
190 if (shdr_str.sh_type != SHT_STRTAB)
191 goto fail;;
192
193 for (j = 0; j < shdr.sh_size / sizeof (sym); j++) {
194 if (copy_from_user(&sym, (void *) (spu_elf_start +
195 shdr.sh_offset + j *
196 sizeof (sym)),
197 sizeof (sym)))
198 goto fail;
199
200 if (copy_from_user(name, (void *)
201 (spu_elf_start + shdr_str.sh_offset +
202 sym.st_name),
203 20))
204 goto fail;
205
206 if (memcmp(name, "_ovly_table", 12) == 0)
207 ovly_table_sym = sym.st_value;
208 if (memcmp(name, "_ovly_buf_table", 16) == 0)
209 ovly_buf_table_sym = sym.st_value;
210 if (memcmp(name, "_ovly_table_end", 16) == 0)
211 ovly_table_end_sym = sym.st_value;
212 if (memcmp(name, "_ovly_buf_table_end", 20) == 0)
213 ovly_buf_table_end_sym = sym.st_value;
214 }
215 }
216
217 /* If we don't have overlays, we're done. */
218 if (ovly_table_sym == 0 || ovly_buf_table_sym == 0
219 || ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) {
220 pr_debug("SPU_PROF: No overlay table found\n");
221 goto out;
222 } else {
223 pr_debug("SPU_PROF: Overlay table found\n");
224 }
225
226 /* The _ovly_table symbol represents a table with one entry
227 * per overlay section. The _ovly_buf_table symbol represents
228 * a table with one entry per overlay region.
229 * The struct spu_overlay_info gives the structure of the _ovly_table
230 * entries. The structure of _ovly_table_buf is simply one
231 * u32 word per entry.
232 */
233 overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym,
234 aSpu, &grd_val);
235 if (overlay_tbl_offset < 0) {
236 printk(KERN_ERR "SPU_PROF: "
237 "%s, line %d: Error finding SPU overlay table\n",
238 __FUNCTION__, __LINE__);
239 goto fail;
240 }
241 ovly_table = spu_elf_start + overlay_tbl_offset;
242
243 n_ovlys = (ovly_table_end_sym -
244 ovly_table_sym) / sizeof (ovly);
245
246 /* Traverse overlay table. */
247 for (i = 0; i < n_ovlys; i++) {
248 if (copy_from_user(&ovly, (void *)
249 (ovly_table + i * sizeof (ovly)),
250 sizeof (ovly)))
251 goto fail;
252
253 /* The ovly.vma/size/offset arguments are analogous to the same
254 * arguments used above for non-overlay maps. The final two
255 * args are referred to as the guard pointer and the guard
256 * value.
257 * The guard pointer is an entry in the _ovly_buf_table,
258 * computed using ovly.buf as the index into the table. Since
259 * ovly.buf values begin at '1' to reference the first (or 0th)
260 * entry in the _ovly_buf_table, the computation subtracts 1
261 * from ovly.buf.
262 * The guard value is stored in the _ovly_buf_table entry and
263 * is an index (starting at 1) back to the _ovly_table entry
264 * that is pointing at this _ovly_buf_table entry. So, for
265 * example, for an overlay scenario with one overlay segment
266 * and two overlay sections:
267 * - Section 1 points to the first entry of the
268 * _ovly_buf_table, which contains a guard value
269 * of '1', referencing the first (index=0) entry of
270 * _ovly_table.
271 * - Section 2 points to the second entry of the
272 * _ovly_buf_table, which contains a guard value
273 * of '2', referencing the second (index=1) entry of
274 * _ovly_table.
275 */
276 map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset,
277 ovly_buf_table_sym + (ovly.buf-1) * 4, i+1);
278 if (!map)
279 goto fail;
280 }
281 goto out;
282
283 fail:
284 map = NULL;
285 out:
286 return map;
287}
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index 1a7ef7e246d2..a28cce1d6c24 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -29,6 +29,8 @@ static struct op_powerpc_model *model;
29static struct op_counter_config ctr[OP_MAX_COUNTER]; 29static struct op_counter_config ctr[OP_MAX_COUNTER];
30static struct op_system_config sys; 30static struct op_system_config sys;
31 31
32static int op_per_cpu_rc;
33
32static void op_handle_interrupt(struct pt_regs *regs) 34static void op_handle_interrupt(struct pt_regs *regs)
33{ 35{
34 model->handle_interrupt(regs, ctr); 36 model->handle_interrupt(regs, ctr);
@@ -36,25 +38,41 @@ static void op_handle_interrupt(struct pt_regs *regs)
36 38
37static void op_powerpc_cpu_setup(void *dummy) 39static void op_powerpc_cpu_setup(void *dummy)
38{ 40{
39 model->cpu_setup(ctr); 41 int ret;
42
43 ret = model->cpu_setup(ctr);
44
45 if (ret != 0)
46 op_per_cpu_rc = ret;
40} 47}
41 48
42static int op_powerpc_setup(void) 49static int op_powerpc_setup(void)
43{ 50{
44 int err; 51 int err;
45 52
53 op_per_cpu_rc = 0;
54
46 /* Grab the hardware */ 55 /* Grab the hardware */
47 err = reserve_pmc_hardware(op_handle_interrupt); 56 err = reserve_pmc_hardware(op_handle_interrupt);
48 if (err) 57 if (err)
49 return err; 58 return err;
50 59
51 /* Pre-compute the values to stuff in the hardware registers. */ 60 /* Pre-compute the values to stuff in the hardware registers. */
52 model->reg_setup(ctr, &sys, model->num_counters); 61 op_per_cpu_rc = model->reg_setup(ctr, &sys, model->num_counters);
53 62
54 /* Configure the registers on all cpus. */ 63 if (op_per_cpu_rc)
64 goto out;
65
66 /* Configure the registers on all cpus. If an error occurs on one
67 * of the cpus, op_per_cpu_rc will be set to the error */
55 on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1); 68 on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1);
56 69
57 return 0; 70out: if (op_per_cpu_rc) {
71 /* error on setup release the performance counter hardware */
72 release_pmc_hardware();
73 }
74
75 return op_per_cpu_rc;
58} 76}
59 77
60static void op_powerpc_shutdown(void) 78static void op_powerpc_shutdown(void)
@@ -64,16 +82,29 @@ static void op_powerpc_shutdown(void)
64 82
65static void op_powerpc_cpu_start(void *dummy) 83static void op_powerpc_cpu_start(void *dummy)
66{ 84{
67 model->start(ctr); 85 /* If any of the cpus have return an error, set the
86 * global flag to the error so it can be returned
87 * to the generic OProfile caller.
88 */
89 int ret;
90
91 ret = model->start(ctr);
92 if (ret != 0)
93 op_per_cpu_rc = ret;
68} 94}
69 95
70static int op_powerpc_start(void) 96static int op_powerpc_start(void)
71{ 97{
98 op_per_cpu_rc = 0;
99
72 if (model->global_start) 100 if (model->global_start)
73 model->global_start(ctr); 101 return model->global_start(ctr);
74 if (model->start) 102 if (model->start) {
75 on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1); 103 on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1);
76 return 0; 104 return op_per_cpu_rc;
105 }
106 return -EIO; /* No start function is defined for this
107 power architecture */
77} 108}
78 109
79static inline void op_powerpc_cpu_stop(void *dummy) 110static inline void op_powerpc_cpu_stop(void *dummy)
@@ -147,11 +178,13 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
147 178
148 switch (cur_cpu_spec->oprofile_type) { 179 switch (cur_cpu_spec->oprofile_type) {
149#ifdef CONFIG_PPC64 180#ifdef CONFIG_PPC64
150#ifdef CONFIG_PPC_CELL_NATIVE 181#ifdef CONFIG_OPROFILE_CELL
151 case PPC_OPROFILE_CELL: 182 case PPC_OPROFILE_CELL:
152 if (firmware_has_feature(FW_FEATURE_LPAR)) 183 if (firmware_has_feature(FW_FEATURE_LPAR))
153 return -ENODEV; 184 return -ENODEV;
154 model = &op_model_cell; 185 model = &op_model_cell;
186 ops->sync_start = model->sync_start;
187 ops->sync_stop = model->sync_stop;
155 break; 188 break;
156#endif 189#endif
157 case PPC_OPROFILE_RS64: 190 case PPC_OPROFILE_RS64:
diff --git a/arch/powerpc/oprofile/op_model_7450.c b/arch/powerpc/oprofile/op_model_7450.c
index 5d1bbaf35ccb..cc599eb8768b 100644
--- a/arch/powerpc/oprofile/op_model_7450.c
+++ b/arch/powerpc/oprofile/op_model_7450.c
@@ -81,7 +81,7 @@ static void pmc_stop_ctrs(void)
81 81
82/* Configures the counters on this CPU based on the global 82/* Configures the counters on this CPU based on the global
83 * settings */ 83 * settings */
84static void fsl7450_cpu_setup(struct op_counter_config *ctr) 84static int fsl7450_cpu_setup(struct op_counter_config *ctr)
85{ 85{
86 /* freeze all counters */ 86 /* freeze all counters */
87 pmc_stop_ctrs(); 87 pmc_stop_ctrs();
@@ -89,12 +89,14 @@ static void fsl7450_cpu_setup(struct op_counter_config *ctr)
89 mtspr(SPRN_MMCR0, mmcr0_val); 89 mtspr(SPRN_MMCR0, mmcr0_val);
90 mtspr(SPRN_MMCR1, mmcr1_val); 90 mtspr(SPRN_MMCR1, mmcr1_val);
91 mtspr(SPRN_MMCR2, mmcr2_val); 91 mtspr(SPRN_MMCR2, mmcr2_val);
92
93 return 0;
92} 94}
93 95
94#define NUM_CTRS 6 96#define NUM_CTRS 6
95 97
96/* Configures the global settings for the countes on all CPUs. */ 98/* Configures the global settings for the countes on all CPUs. */
97static void fsl7450_reg_setup(struct op_counter_config *ctr, 99static int fsl7450_reg_setup(struct op_counter_config *ctr,
98 struct op_system_config *sys, 100 struct op_system_config *sys,
99 int num_ctrs) 101 int num_ctrs)
100{ 102{
@@ -126,10 +128,12 @@ static void fsl7450_reg_setup(struct op_counter_config *ctr,
126 | mmcr1_event6(ctr[5].event); 128 | mmcr1_event6(ctr[5].event);
127 129
128 mmcr2_val = 0; 130 mmcr2_val = 0;
131
132 return 0;
129} 133}
130 134
131/* Sets the counters on this CPU to the chosen values, and starts them */ 135/* Sets the counters on this CPU to the chosen values, and starts them */
132static void fsl7450_start(struct op_counter_config *ctr) 136static int fsl7450_start(struct op_counter_config *ctr)
133{ 137{
134 int i; 138 int i;
135 139
@@ -148,6 +152,8 @@ static void fsl7450_start(struct op_counter_config *ctr)
148 pmc_start_ctrs(); 152 pmc_start_ctrs();
149 153
150 oprofile_running = 1; 154 oprofile_running = 1;
155
156 return 0;
151} 157}
152 158
153/* Stop the counters on this CPU */ 159/* Stop the counters on this CPU */
@@ -193,7 +199,7 @@ static void fsl7450_handle_interrupt(struct pt_regs *regs,
193 /* The freeze bit was set by the interrupt. */ 199 /* The freeze bit was set by the interrupt. */
194 /* Clear the freeze bit, and reenable the interrupt. 200 /* Clear the freeze bit, and reenable the interrupt.
195 * The counters won't actually start until the rfi clears 201 * The counters won't actually start until the rfi clears
196 * the PMM bit */ 202 * the PM/M bit */
197 pmc_start_ctrs(); 203 pmc_start_ctrs();
198} 204}
199 205
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index c29293befba9..d928b54f3a0f 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -5,8 +5,8 @@
5 * 5 *
6 * Author: David Erb (djerb@us.ibm.com) 6 * Author: David Erb (djerb@us.ibm.com)
7 * Modifications: 7 * Modifications:
8 * Carl Love <carll@us.ibm.com> 8 * Carl Love <carll@us.ibm.com>
9 * Maynard Johnson <maynardj@us.ibm.com> 9 * Maynard Johnson <maynardj@us.ibm.com>
10 * 10 *
11 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License 12 * modify it under the terms of the GNU General Public License
@@ -38,12 +38,25 @@
38 38
39#include "../platforms/cell/interrupt.h" 39#include "../platforms/cell/interrupt.h"
40#include "../platforms/cell/cbe_regs.h" 40#include "../platforms/cell/cbe_regs.h"
41#include "cell/pr_util.h"
42
43static void cell_global_stop_spu(void);
44
45/*
46 * spu_cycle_reset is the number of cycles between samples.
47 * This variable is used for SPU profiling and should ONLY be set
48 * at the beginning of cell_reg_setup; otherwise, it's read-only.
49 */
50static unsigned int spu_cycle_reset;
51
52#define NUM_SPUS_PER_NODE 8
53#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */
41 54
42#define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ 55#define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */
43#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying 56#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying
44 * PPU_CYCLES event 57 * PPU_CYCLES event
45 */ 58 */
46#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ 59#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */
47 60
48#define NUM_THREADS 2 /* number of physical threads in 61#define NUM_THREADS 2 /* number of physical threads in
49 * physical processor 62 * physical processor
@@ -51,6 +64,7 @@
51#define NUM_TRACE_BUS_WORDS 4 64#define NUM_TRACE_BUS_WORDS 4
52#define NUM_INPUT_BUS_WORDS 2 65#define NUM_INPUT_BUS_WORDS 2
53 66
67#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */
54 68
55struct pmc_cntrl_data { 69struct pmc_cntrl_data {
56 unsigned long vcntr; 70 unsigned long vcntr;
@@ -62,11 +76,10 @@ struct pmc_cntrl_data {
62/* 76/*
63 * ibm,cbe-perftools rtas parameters 77 * ibm,cbe-perftools rtas parameters
64 */ 78 */
65
66struct pm_signal { 79struct pm_signal {
67 u16 cpu; /* Processor to modify */ 80 u16 cpu; /* Processor to modify */
68 u16 sub_unit; /* hw subunit this applies to (if applicable) */ 81 u16 sub_unit; /* hw subunit this applies to (if applicable)*/
69 short int signal_group; /* Signal Group to Enable/Disable */ 82 short int signal_group; /* Signal Group to Enable/Disable */
70 u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event 83 u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event
71 * Bus Word(s) (bitmask) 84 * Bus Word(s) (bitmask)
72 */ 85 */
@@ -112,21 +125,42 @@ static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
112 125
113static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; 126static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
114 127
115/* Interpetation of hdw_thread: 128/*
129 * The CELL profiling code makes rtas calls to setup the debug bus to
130 * route the performance signals. Additionally, SPU profiling requires
131 * a second rtas call to setup the hardware to capture the SPU PCs.
132 * The EIO error value is returned if the token lookups or the rtas
133 * call fail. The EIO error number is the best choice of the existing
134 * error numbers. The probability of rtas related error is very low. But
135 * by returning EIO and printing additional information to dmsg the user
136 * will know that OProfile did not start and dmesg will tell them why.
137 * OProfile does not support returning errors on Stop. Not a huge issue
138 * since failure to reset the debug bus or stop the SPU PC collection is
139 * not a fatel issue. Chances are if the Stop failed, Start doesn't work
140 * either.
141 */
142
143/*
144 * Interpetation of hdw_thread:
116 * 0 - even virtual cpus 0, 2, 4,... 145 * 0 - even virtual cpus 0, 2, 4,...
117 * 1 - odd virtual cpus 1, 3, 5, ... 146 * 1 - odd virtual cpus 1, 3, 5, ...
147 *
148 * FIXME: this is strictly wrong, we need to clean this up in a number
149 * of places. It works for now. -arnd
118 */ 150 */
119static u32 hdw_thread; 151static u32 hdw_thread;
120 152
121static u32 virt_cntr_inter_mask; 153static u32 virt_cntr_inter_mask;
122static struct timer_list timer_virt_cntr; 154static struct timer_list timer_virt_cntr;
123 155
124/* pm_signal needs to be global since it is initialized in 156/*
157 * pm_signal needs to be global since it is initialized in
125 * cell_reg_setup at the time when the necessary information 158 * cell_reg_setup at the time when the necessary information
126 * is available. 159 * is available.
127 */ 160 */
128static struct pm_signal pm_signal[NR_PHYS_CTRS]; 161static struct pm_signal pm_signal[NR_PHYS_CTRS];
129static int pm_rtas_token; 162static int pm_rtas_token; /* token for debug bus setup call */
163static int spu_rtas_token; /* token for SPU cycle profiling */
130 164
131static u32 reset_value[NR_PHYS_CTRS]; 165static u32 reset_value[NR_PHYS_CTRS];
132static int num_counters; 166static int num_counters;
@@ -147,8 +181,8 @@ rtas_ibm_cbe_perftools(int subfunc, int passthru,
147{ 181{
148 u64 paddr = __pa(address); 182 u64 paddr = __pa(address);
149 183
150 return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru, 184 return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc,
151 paddr >> 32, paddr & 0xffffffff, length); 185 passthru, paddr >> 32, paddr & 0xffffffff, length);
152} 186}
153 187
154static void pm_rtas_reset_signals(u32 node) 188static void pm_rtas_reset_signals(u32 node)
@@ -156,12 +190,13 @@ static void pm_rtas_reset_signals(u32 node)
156 int ret; 190 int ret;
157 struct pm_signal pm_signal_local; 191 struct pm_signal pm_signal_local;
158 192
159 /* The debug bus is being set to the passthru disable state. 193 /*
160 * However, the FW still expects atleast one legal signal routing 194 * The debug bus is being set to the passthru disable state.
161 * entry or it will return an error on the arguments. If we don't 195 * However, the FW still expects atleast one legal signal routing
162 * supply a valid entry, we must ignore all return values. Ignoring 196 * entry or it will return an error on the arguments. If we don't
163 * all return values means we might miss an error we should be 197 * supply a valid entry, we must ignore all return values. Ignoring
164 * concerned about. 198 * all return values means we might miss an error we should be
199 * concerned about.
165 */ 200 */
166 201
167 /* fw expects physical cpu #. */ 202 /* fw expects physical cpu #. */
@@ -175,18 +210,24 @@ static void pm_rtas_reset_signals(u32 node)
175 &pm_signal_local, 210 &pm_signal_local,
176 sizeof(struct pm_signal)); 211 sizeof(struct pm_signal));
177 212
178 if (ret) 213 if (unlikely(ret))
214 /*
215 * Not a fatal error. For Oprofile stop, the oprofile
216 * functions do not support returning an error for
217 * failure to stop OProfile.
218 */
179 printk(KERN_WARNING "%s: rtas returned: %d\n", 219 printk(KERN_WARNING "%s: rtas returned: %d\n",
180 __FUNCTION__, ret); 220 __FUNCTION__, ret);
181} 221}
182 222
183static void pm_rtas_activate_signals(u32 node, u32 count) 223static int pm_rtas_activate_signals(u32 node, u32 count)
184{ 224{
185 int ret; 225 int ret;
186 int i, j; 226 int i, j;
187 struct pm_signal pm_signal_local[NR_PHYS_CTRS]; 227 struct pm_signal pm_signal_local[NR_PHYS_CTRS];
188 228
189 /* There is no debug setup required for the cycles event. 229 /*
230 * There is no debug setup required for the cycles event.
190 * Note that only events in the same group can be used. 231 * Note that only events in the same group can be used.
191 * Otherwise, there will be conflicts in correctly routing 232 * Otherwise, there will be conflicts in correctly routing
192 * the signals on the debug bus. It is the responsiblity 233 * the signals on the debug bus. It is the responsiblity
@@ -213,10 +254,14 @@ static void pm_rtas_activate_signals(u32 node, u32 count)
213 pm_signal_local, 254 pm_signal_local,
214 i * sizeof(struct pm_signal)); 255 i * sizeof(struct pm_signal));
215 256
216 if (ret) 257 if (unlikely(ret)) {
217 printk(KERN_WARNING "%s: rtas returned: %d\n", 258 printk(KERN_WARNING "%s: rtas returned: %d\n",
218 __FUNCTION__, ret); 259 __FUNCTION__, ret);
260 return -EIO;
261 }
219 } 262 }
263
264 return 0;
220} 265}
221 266
222/* 267/*
@@ -260,11 +305,12 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
260 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); 305 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity);
261 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); 306 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control);
262 307
263 /* Some of the islands signal selection is based on 64 bit words. 308 /*
309 * Some of the islands signal selection is based on 64 bit words.
264 * The debug bus words are 32 bits, the input words to the performance 310 * The debug bus words are 32 bits, the input words to the performance
265 * counters are defined as 32 bits. Need to convert the 64 bit island 311 * counters are defined as 32 bits. Need to convert the 64 bit island
266 * specification to the appropriate 32 input bit and bus word for the 312 * specification to the appropriate 32 input bit and bus word for the
267 * performance counter event selection. See the CELL Performance 313 * performance counter event selection. See the CELL Performance
268 * monitoring signals manual and the Perf cntr hardware descriptions 314 * monitoring signals manual and the Perf cntr hardware descriptions
269 * for the details. 315 * for the details.
270 */ 316 */
@@ -298,6 +344,7 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
298 input_bus[j] = i; 344 input_bus[j] = i;
299 pm_regs.group_control |= 345 pm_regs.group_control |=
300 (i << (31 - i)); 346 (i << (31 - i));
347
301 break; 348 break;
302 } 349 }
303 } 350 }
@@ -309,7 +356,8 @@ out:
309 356
310static void write_pm_cntrl(int cpu) 357static void write_pm_cntrl(int cpu)
311{ 358{
312 /* Oprofile will use 32 bit counters, set bits 7:10 to 0 359 /*
360 * Oprofile will use 32 bit counters, set bits 7:10 to 0
313 * pmregs.pm_cntrl is a global 361 * pmregs.pm_cntrl is a global
314 */ 362 */
315 363
@@ -326,7 +374,8 @@ static void write_pm_cntrl(int cpu)
326 if (pm_regs.pm_cntrl.freeze == 1) 374 if (pm_regs.pm_cntrl.freeze == 1)
327 val |= CBE_PM_FREEZE_ALL_CTRS; 375 val |= CBE_PM_FREEZE_ALL_CTRS;
328 376
329 /* Routine set_count_mode must be called previously to set 377 /*
378 * Routine set_count_mode must be called previously to set
330 * the count mode based on the user selection of user and kernel. 379 * the count mode based on the user selection of user and kernel.
331 */ 380 */
332 val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); 381 val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode);
@@ -336,7 +385,8 @@ static void write_pm_cntrl(int cpu)
336static inline void 385static inline void
337set_count_mode(u32 kernel, u32 user) 386set_count_mode(u32 kernel, u32 user)
338{ 387{
339 /* The user must specify user and kernel if they want them. If 388 /*
389 * The user must specify user and kernel if they want them. If
340 * neither is specified, OProfile will count in hypervisor mode. 390 * neither is specified, OProfile will count in hypervisor mode.
341 * pm_regs.pm_cntrl is a global 391 * pm_regs.pm_cntrl is a global
342 */ 392 */
@@ -364,7 +414,7 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl)
364 414
365/* 415/*
366 * Oprofile is expected to collect data on all CPUs simultaneously. 416 * Oprofile is expected to collect data on all CPUs simultaneously.
367 * However, there is one set of performance counters per node. There are 417 * However, there is one set of performance counters per node. There are
368 * two hardware threads or virtual CPUs on each node. Hence, OProfile must 418 * two hardware threads or virtual CPUs on each node. Hence, OProfile must
369 * multiplex in time the performance counter collection on the two virtual 419 * multiplex in time the performance counter collection on the two virtual
370 * CPUs. The multiplexing of the performance counters is done by this 420 * CPUs. The multiplexing of the performance counters is done by this
@@ -377,19 +427,19 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl)
377 * pair of per-cpu arrays is used for storing the previous and next 427 * pair of per-cpu arrays is used for storing the previous and next
378 * pmc values for a given node. 428 * pmc values for a given node.
379 * NOTE: We use the per-cpu variable to improve cache performance. 429 * NOTE: We use the per-cpu variable to improve cache performance.
430 *
431 * This routine will alternate loading the virtual counters for
432 * virtual CPUs
380 */ 433 */
381static void cell_virtual_cntr(unsigned long data) 434static void cell_virtual_cntr(unsigned long data)
382{ 435{
383 /* This routine will alternate loading the virtual counters for
384 * virtual CPUs
385 */
386 int i, prev_hdw_thread, next_hdw_thread; 436 int i, prev_hdw_thread, next_hdw_thread;
387 u32 cpu; 437 u32 cpu;
388 unsigned long flags; 438 unsigned long flags;
389 439
390 /* Make sure that the interrupt_hander and 440 /*
391 * the virt counter are not both playing with 441 * Make sure that the interrupt_hander and the virt counter are
392 * the counters on the same node. 442 * not both playing with the counters on the same node.
393 */ 443 */
394 444
395 spin_lock_irqsave(&virt_cntr_lock, flags); 445 spin_lock_irqsave(&virt_cntr_lock, flags);
@@ -400,22 +450,25 @@ static void cell_virtual_cntr(unsigned long data)
400 hdw_thread = 1 ^ hdw_thread; 450 hdw_thread = 1 ^ hdw_thread;
401 next_hdw_thread = hdw_thread; 451 next_hdw_thread = hdw_thread;
402 452
403 for (i = 0; i < num_counters; i++) 453 /*
404 /* There are some per thread events. Must do the 454 * There are some per thread events. Must do the
405 * set event, for the thread that is being started 455 * set event, for the thread that is being started
406 */ 456 */
457 for (i = 0; i < num_counters; i++)
407 set_pm_event(i, 458 set_pm_event(i,
408 pmc_cntrl[next_hdw_thread][i].evnts, 459 pmc_cntrl[next_hdw_thread][i].evnts,
409 pmc_cntrl[next_hdw_thread][i].masks); 460 pmc_cntrl[next_hdw_thread][i].masks);
410 461
411 /* The following is done only once per each node, but 462 /*
463 * The following is done only once per each node, but
412 * we need cpu #, not node #, to pass to the cbe_xxx functions. 464 * we need cpu #, not node #, to pass to the cbe_xxx functions.
413 */ 465 */
414 for_each_online_cpu(cpu) { 466 for_each_online_cpu(cpu) {
415 if (cbe_get_hw_thread_id(cpu)) 467 if (cbe_get_hw_thread_id(cpu))
416 continue; 468 continue;
417 469
418 /* stop counters, save counter values, restore counts 470 /*
471 * stop counters, save counter values, restore counts
419 * for previous thread 472 * for previous thread
420 */ 473 */
421 cbe_disable_pm(cpu); 474 cbe_disable_pm(cpu);
@@ -428,7 +481,7 @@ static void cell_virtual_cntr(unsigned long data)
428 == 0xFFFFFFFF) 481 == 0xFFFFFFFF)
429 /* If the cntr value is 0xffffffff, we must 482 /* If the cntr value is 0xffffffff, we must
430 * reset that to 0xfffffff0 when the current 483 * reset that to 0xfffffff0 when the current
431 * thread is restarted. This will generate a 484 * thread is restarted. This will generate a
432 * new interrupt and make sure that we never 485 * new interrupt and make sure that we never
433 * restore the counters to the max value. If 486 * restore the counters to the max value. If
434 * the counters were restored to the max value, 487 * the counters were restored to the max value,
@@ -444,13 +497,15 @@ static void cell_virtual_cntr(unsigned long data)
444 next_hdw_thread)[i]); 497 next_hdw_thread)[i]);
445 } 498 }
446 499
447 /* Switch to the other thread. Change the interrupt 500 /*
501 * Switch to the other thread. Change the interrupt
448 * and control regs to be scheduled on the CPU 502 * and control regs to be scheduled on the CPU
449 * corresponding to the thread to execute. 503 * corresponding to the thread to execute.
450 */ 504 */
451 for (i = 0; i < num_counters; i++) { 505 for (i = 0; i < num_counters; i++) {
452 if (pmc_cntrl[next_hdw_thread][i].enabled) { 506 if (pmc_cntrl[next_hdw_thread][i].enabled) {
453 /* There are some per thread events. 507 /*
508 * There are some per thread events.
454 * Must do the set event, enable_cntr 509 * Must do the set event, enable_cntr
455 * for each cpu. 510 * for each cpu.
456 */ 511 */
@@ -482,17 +537,42 @@ static void start_virt_cntrs(void)
482} 537}
483 538
484/* This function is called once for all cpus combined */ 539/* This function is called once for all cpus combined */
485static void 540static int cell_reg_setup(struct op_counter_config *ctr,
486cell_reg_setup(struct op_counter_config *ctr, 541 struct op_system_config *sys, int num_ctrs)
487 struct op_system_config *sys, int num_ctrs)
488{ 542{
489 int i, j, cpu; 543 int i, j, cpu;
544 spu_cycle_reset = 0;
545
546 if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
547 spu_cycle_reset = ctr[0].count;
548
549 /*
550 * Each node will need to make the rtas call to start
551 * and stop SPU profiling. Get the token once and store it.
552 */
553 spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
554
555 if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
556 printk(KERN_ERR
557 "%s: rtas token ibm,cbe-spu-perftools unknown\n",
558 __FUNCTION__);
559 return -EIO;
560 }
561 }
490 562
491 pm_rtas_token = rtas_token("ibm,cbe-perftools"); 563 pm_rtas_token = rtas_token("ibm,cbe-perftools");
492 if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) { 564
493 printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n", 565 /*
566 * For all events excetp PPU CYCLEs, each node will need to make
567 * the rtas cbe-perftools call to setup and reset the debug bus.
568 * Make the token lookup call once and store it in the global
569 * variable pm_rtas_token.
570 */
571 if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
572 printk(KERN_ERR
573 "%s: rtas token ibm,cbe-perftools unknown\n",
494 __FUNCTION__); 574 __FUNCTION__);
495 goto out; 575 return -EIO;
496 } 576 }
497 577
498 num_counters = num_ctrs; 578 num_counters = num_ctrs;
@@ -520,7 +600,8 @@ cell_reg_setup(struct op_counter_config *ctr,
520 per_cpu(pmc_values, j)[i] = 0; 600 per_cpu(pmc_values, j)[i] = 0;
521 } 601 }
522 602
523 /* Setup the thread 1 events, map the thread 0 event to the 603 /*
604 * Setup the thread 1 events, map the thread 0 event to the
524 * equivalent thread 1 event. 605 * equivalent thread 1 event.
525 */ 606 */
526 for (i = 0; i < num_ctrs; ++i) { 607 for (i = 0; i < num_ctrs; ++i) {
@@ -544,9 +625,10 @@ cell_reg_setup(struct op_counter_config *ctr,
544 for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) 625 for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
545 input_bus[i] = 0xff; 626 input_bus[i] = 0xff;
546 627
547 /* Our counters count up, and "count" refers to 628 /*
629 * Our counters count up, and "count" refers to
548 * how much before the next interrupt, and we interrupt 630 * how much before the next interrupt, and we interrupt
549 * on overflow. So we calculate the starting value 631 * on overflow. So we calculate the starting value
550 * which will give us "count" until overflow. 632 * which will give us "count" until overflow.
551 * Then we set the events on the enabled counters. 633 * Then we set the events on the enabled counters.
552 */ 634 */
@@ -569,28 +651,27 @@ cell_reg_setup(struct op_counter_config *ctr,
569 for (i = 0; i < num_counters; ++i) { 651 for (i = 0; i < num_counters; ++i) {
570 per_cpu(pmc_values, cpu)[i] = reset_value[i]; 652 per_cpu(pmc_values, cpu)[i] = reset_value[i];
571 } 653 }
572out: 654
573 ; 655 return 0;
574} 656}
575 657
658
659
576/* This function is called once for each cpu */ 660/* This function is called once for each cpu */
577static void cell_cpu_setup(struct op_counter_config *cntr) 661static int cell_cpu_setup(struct op_counter_config *cntr)
578{ 662{
579 u32 cpu = smp_processor_id(); 663 u32 cpu = smp_processor_id();
580 u32 num_enabled = 0; 664 u32 num_enabled = 0;
581 int i; 665 int i;
582 666
667 if (spu_cycle_reset)
668 return 0;
669
583 /* There is one performance monitor per processor chip (i.e. node), 670 /* There is one performance monitor per processor chip (i.e. node),
584 * so we only need to perform this function once per node. 671 * so we only need to perform this function once per node.
585 */ 672 */
586 if (cbe_get_hw_thread_id(cpu)) 673 if (cbe_get_hw_thread_id(cpu))
587 goto out; 674 return 0;
588
589 if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) {
590 printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n",
591 __FUNCTION__);
592 goto out;
593 }
594 675
595 /* Stop all counters */ 676 /* Stop all counters */
596 cbe_disable_pm(cpu); 677 cbe_disable_pm(cpu);
@@ -609,16 +690,286 @@ static void cell_cpu_setup(struct op_counter_config *cntr)
609 } 690 }
610 } 691 }
611 692
612 pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); 693 /*
694 * The pm_rtas_activate_signals will return -EIO if the FW
695 * call failed.
696 */
697 return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
698}
699
700#define ENTRIES 303
701#define MAXLFSR 0xFFFFFF
702
703/* precomputed table of 24 bit LFSR values */
704static int initial_lfsr[] = {
705 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
706 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716,
707 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547,
708 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392,
709 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026,
710 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556,
711 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769,
712 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893,
713 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017,
714 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756,
715 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558,
716 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401,
717 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720,
718 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042,
719 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955,
720 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934,
721 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783,
722 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278,
723 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051,
724 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741,
725 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972,
726 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302,
727 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384,
728 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469,
729 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697,
730 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398,
731 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140,
732 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214,
733 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386,
734 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087,
735 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130,
736 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300,
737 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475,
738 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950,
739 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003,
740 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375,
741 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426,
742 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607
743};
744
745/*
746 * The hardware uses an LFSR counting sequence to determine when to capture
747 * the SPU PCs. An LFSR sequence is like a puesdo random number sequence
748 * where each number occurs once in the sequence but the sequence is not in
749 * numerical order. The SPU PC capture is done when the LFSR sequence reaches
750 * the last value in the sequence. Hence the user specified value N
751 * corresponds to the LFSR number that is N from the end of the sequence.
752 *
753 * To avoid the time to compute the LFSR, a lookup table is used. The 24 bit
754 * LFSR sequence is broken into four ranges. The spacing of the precomputed
755 * values is adjusted in each range so the error between the user specifed
756 * number (N) of events between samples and the actual number of events based
757 * on the precomputed value will be les then about 6.2%. Note, if the user
758 * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used.
759 * This is to prevent the loss of samples because the trace buffer is full.
760 *
761 * User specified N Step between Index in
762 * precomputed values precomputed
763 * table
764 * 0 to 2^16-1 ---- 0
765 * 2^16 to 2^16+2^19-1 2^12 1 to 128
766 * 2^16+2^19 to 2^16+2^19+2^22-1 2^15 129 to 256
767 * 2^16+2^19+2^22 to 2^24-1 2^18 257 to 302
768 *
769 *
770 * For example, the LFSR values in the second range are computed for 2^16,
771 * 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies
772 * 1, 2,..., 127, 128.
773 *
774 * The 24 bit LFSR value for the nth number in the sequence can be
775 * calculated using the following code:
776 *
777 * #define size 24
778 * int calculate_lfsr(int n)
779 * {
780 * int i;
781 * unsigned int newlfsr0;
782 * unsigned int lfsr = 0xFFFFFF;
783 * unsigned int howmany = n;
784 *
785 * for (i = 2; i < howmany + 2; i++) {
786 * newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^
787 * ((lfsr >> (size - 1 - 1)) & 1) ^
788 * (((lfsr >> (size - 1 - 6)) & 1) ^
789 * ((lfsr >> (size - 1 - 23)) & 1)));
790 *
791 * lfsr >>= 1;
792 * lfsr = lfsr | (newlfsr0 << (size - 1));
793 * }
794 * return lfsr;
795 * }
796 */
797
798#define V2_16 (0x1 << 16)
799#define V2_19 (0x1 << 19)
800#define V2_22 (0x1 << 22)
801
802static int calculate_lfsr(int n)
803{
804 /*
805 * The ranges and steps are in powers of 2 so the calculations
806 * can be done using shifts rather then divide.
807 */
808 int index;
809
810 if ((n >> 16) == 0)
811 index = 0;
812 else if (((n - V2_16) >> 19) == 0)
813 index = ((n - V2_16) >> 12) + 1;
814 else if (((n - V2_16 - V2_19) >> 22) == 0)
815 index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128;
816 else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0)
817 index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256;
818 else
819 index = ENTRIES-1;
820
821 /* make sure index is valid */
822 if ((index > ENTRIES) || (index < 0))
823 index = ENTRIES-1;
824
825 return initial_lfsr[index];
826}
827
828static int pm_rtas_activate_spu_profiling(u32 node)
829{
830 int ret, i;
831 struct pm_signal pm_signal_local[NR_PHYS_CTRS];
832
833 /*
834 * Set up the rtas call to configure the debug bus to
835 * route the SPU PCs. Setup the pm_signal for each SPU
836 */
837 for (i = 0; i < NUM_SPUS_PER_NODE; i++) {
838 pm_signal_local[i].cpu = node;
839 pm_signal_local[i].signal_group = 41;
840 /* spu i on word (i/2) */
841 pm_signal_local[i].bus_word = 1 << i / 2;
842 /* spu i */
843 pm_signal_local[i].sub_unit = i;
844 pm_signal_local[i].bit = 63;
845 }
846
847 ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE,
848 PASSTHRU_ENABLE, pm_signal_local,
849 (NUM_SPUS_PER_NODE
850 * sizeof(struct pm_signal)));
851
852 if (unlikely(ret)) {
853 printk(KERN_WARNING "%s: rtas returned: %d\n",
854 __FUNCTION__, ret);
855 return -EIO;
856 }
857
858 return 0;
859}
860
861#ifdef CONFIG_CPU_FREQ
862static int
863oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data)
864{
865 int ret = 0;
866 struct cpufreq_freqs *frq = data;
867 if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) ||
868 (val == CPUFREQ_POSTCHANGE && frq->old > frq->new) ||
869 (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE))
870 set_spu_profiling_frequency(frq->new, spu_cycle_reset);
871 return ret;
872}
873
874static struct notifier_block cpu_freq_notifier_block = {
875 .notifier_call = oprof_cpufreq_notify
876};
877#endif
878
879static int cell_global_start_spu(struct op_counter_config *ctr)
880{
881 int subfunc;
882 unsigned int lfsr_value;
883 int cpu;
884 int ret;
885 int rtas_error;
886 unsigned int cpu_khzfreq = 0;
887
888 /* The SPU profiling uses time-based profiling based on
889 * cpu frequency, so if configured with the CPU_FREQ
890 * option, we should detect frequency changes and react
891 * accordingly.
892 */
893#ifdef CONFIG_CPU_FREQ
894 ret = cpufreq_register_notifier(&cpu_freq_notifier_block,
895 CPUFREQ_TRANSITION_NOTIFIER);
896 if (ret < 0)
897 /* this is not a fatal error */
898 printk(KERN_ERR "CPU freq change registration failed: %d\n",
899 ret);
900
901 else
902 cpu_khzfreq = cpufreq_quick_get(smp_processor_id());
903#endif
904
905 set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset);
906
907 for_each_online_cpu(cpu) {
908 if (cbe_get_hw_thread_id(cpu))
909 continue;
910
911 /*
912 * Setup SPU cycle-based profiling.
913 * Set perf_mon_control bit 0 to a zero before
914 * enabling spu collection hardware.
915 */
916 cbe_write_pm(cpu, pm_control, 0);
917
918 if (spu_cycle_reset > MAX_SPU_COUNT)
919 /* use largest possible value */
920 lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1);
921 else
922 lfsr_value = calculate_lfsr(spu_cycle_reset);
923
924 /* must use a non zero value. Zero disables data collection. */
925 if (lfsr_value == 0)
926 lfsr_value = calculate_lfsr(1);
927
928 lfsr_value = lfsr_value << 8; /* shift lfsr to correct
929 * register location
930 */
931
932 /* debug bus setup */
933 ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu));
934
935 if (unlikely(ret)) {
936 rtas_error = ret;
937 goto out;
938 }
939
940
941 subfunc = 2; /* 2 - activate SPU tracing, 3 - deactivate */
942
943 /* start profiling */
944 ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
945 cbe_cpu_to_node(cpu), lfsr_value);
946
947 if (unlikely(ret != 0)) {
948 printk(KERN_ERR
949 "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
950 __FUNCTION__, ret);
951 rtas_error = -EIO;
952 goto out;
953 }
954 }
955
956 rtas_error = start_spu_profiling(spu_cycle_reset);
957 if (rtas_error)
958 goto out_stop;
959
960 oprofile_running = 1;
961 return 0;
962
963out_stop:
964 cell_global_stop_spu(); /* clean up the PMU/debug bus */
613out: 965out:
614 ; 966 return rtas_error;
615} 967}
616 968
617static void cell_global_start(struct op_counter_config *ctr) 969static int cell_global_start_ppu(struct op_counter_config *ctr)
618{ 970{
619 u32 cpu; 971 u32 cpu, i;
620 u32 interrupt_mask = 0; 972 u32 interrupt_mask = 0;
621 u32 i;
622 973
623 /* This routine gets called once for the system. 974 /* This routine gets called once for the system.
624 * There is one performance monitor per node, so we 975 * There is one performance monitor per node, so we
@@ -651,19 +1002,79 @@ static void cell_global_start(struct op_counter_config *ctr)
651 oprofile_running = 1; 1002 oprofile_running = 1;
652 smp_wmb(); 1003 smp_wmb();
653 1004
654 /* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being 1005 /*
655 * executed which manipulates the PMU. We start the "virtual counter" 1006 * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
1007 * executed which manipulates the PMU. We start the "virtual counter"
656 * here so that we do not need to synchronize access to the PMU in 1008 * here so that we do not need to synchronize access to the PMU in
657 * the above for-loop. 1009 * the above for-loop.
658 */ 1010 */
659 start_virt_cntrs(); 1011 start_virt_cntrs();
1012
1013 return 0;
660} 1014}
661 1015
662static void cell_global_stop(void) 1016static int cell_global_start(struct op_counter_config *ctr)
1017{
1018 if (spu_cycle_reset)
1019 return cell_global_start_spu(ctr);
1020 else
1021 return cell_global_start_ppu(ctr);
1022}
1023
1024/*
1025 * Note the generic OProfile stop calls do not support returning
1026 * an error on stop. Hence, will not return an error if the FW
1027 * calls fail on stop. Failure to reset the debug bus is not an issue.
1028 * Failure to disable the SPU profiling is not an issue. The FW calls
1029 * to enable the performance counters and debug bus will work even if
1030 * the hardware was not cleanly reset.
1031 */
1032static void cell_global_stop_spu(void)
1033{
1034 int subfunc, rtn_value;
1035 unsigned int lfsr_value;
1036 int cpu;
1037
1038 oprofile_running = 0;
1039
1040#ifdef CONFIG_CPU_FREQ
1041 cpufreq_unregister_notifier(&cpu_freq_notifier_block,
1042 CPUFREQ_TRANSITION_NOTIFIER);
1043#endif
1044
1045 for_each_online_cpu(cpu) {
1046 if (cbe_get_hw_thread_id(cpu))
1047 continue;
1048
1049 subfunc = 3; /*
1050 * 2 - activate SPU tracing,
1051 * 3 - deactivate
1052 */
1053 lfsr_value = 0x8f100000;
1054
1055 rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
1056 subfunc, cbe_cpu_to_node(cpu),
1057 lfsr_value);
1058
1059 if (unlikely(rtn_value != 0)) {
1060 printk(KERN_ERR
1061 "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
1062 __FUNCTION__, rtn_value);
1063 }
1064
1065 /* Deactivate the signals */
1066 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1067 }
1068
1069 stop_spu_profiling();
1070}
1071
1072static void cell_global_stop_ppu(void)
663{ 1073{
664 int cpu; 1074 int cpu;
665 1075
666 /* This routine will be called once for the system. 1076 /*
1077 * This routine will be called once for the system.
667 * There is one performance monitor per node, so we 1078 * There is one performance monitor per node, so we
668 * only need to perform this function once per node. 1079 * only need to perform this function once per node.
669 */ 1080 */
@@ -687,8 +1098,16 @@ static void cell_global_stop(void)
687 } 1098 }
688} 1099}
689 1100
690static void 1101static void cell_global_stop(void)
691cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) 1102{
1103 if (spu_cycle_reset)
1104 cell_global_stop_spu();
1105 else
1106 cell_global_stop_ppu();
1107}
1108
1109static void cell_handle_interrupt(struct pt_regs *regs,
1110 struct op_counter_config *ctr)
692{ 1111{
693 u32 cpu; 1112 u32 cpu;
694 u64 pc; 1113 u64 pc;
@@ -699,13 +1118,15 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
699 1118
700 cpu = smp_processor_id(); 1119 cpu = smp_processor_id();
701 1120
702 /* Need to make sure the interrupt handler and the virt counter 1121 /*
1122 * Need to make sure the interrupt handler and the virt counter
703 * routine are not running at the same time. See the 1123 * routine are not running at the same time. See the
704 * cell_virtual_cntr() routine for additional comments. 1124 * cell_virtual_cntr() routine for additional comments.
705 */ 1125 */
706 spin_lock_irqsave(&virt_cntr_lock, flags); 1126 spin_lock_irqsave(&virt_cntr_lock, flags);
707 1127
708 /* Need to disable and reenable the performance counters 1128 /*
1129 * Need to disable and reenable the performance counters
709 * to get the desired behavior from the hardware. This 1130 * to get the desired behavior from the hardware. This
710 * is hardware specific. 1131 * is hardware specific.
711 */ 1132 */
@@ -714,7 +1135,8 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
714 1135
715 interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); 1136 interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
716 1137
717 /* If the interrupt mask has been cleared, then the virt cntr 1138 /*
1139 * If the interrupt mask has been cleared, then the virt cntr
718 * has cleared the interrupt. When the thread that generated 1140 * has cleared the interrupt. When the thread that generated
719 * the interrupt is restored, the data count will be restored to 1141 * the interrupt is restored, the data count will be restored to
720 * 0xffffff0 to cause the interrupt to be regenerated. 1142 * 0xffffff0 to cause the interrupt to be regenerated.
@@ -732,18 +1154,20 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
732 } 1154 }
733 } 1155 }
734 1156
735 /* The counters were frozen by the interrupt. 1157 /*
1158 * The counters were frozen by the interrupt.
736 * Reenable the interrupt and restart the counters. 1159 * Reenable the interrupt and restart the counters.
737 * If there was a race between the interrupt handler and 1160 * If there was a race between the interrupt handler and
738 * the virtual counter routine. The virutal counter 1161 * the virtual counter routine. The virutal counter
739 * routine may have cleared the interrupts. Hence must 1162 * routine may have cleared the interrupts. Hence must
740 * use the virt_cntr_inter_mask to re-enable the interrupts. 1163 * use the virt_cntr_inter_mask to re-enable the interrupts.
741 */ 1164 */
742 cbe_enable_pm_interrupts(cpu, hdw_thread, 1165 cbe_enable_pm_interrupts(cpu, hdw_thread,
743 virt_cntr_inter_mask); 1166 virt_cntr_inter_mask);
744 1167
745 /* The writes to the various performance counters only writes 1168 /*
746 * to a latch. The new values (interrupt setting bits, reset 1169 * The writes to the various performance counters only writes
1170 * to a latch. The new values (interrupt setting bits, reset
747 * counter value etc.) are not copied to the actual registers 1171 * counter value etc.) are not copied to the actual registers
748 * until the performance monitor is enabled. In order to get 1172 * until the performance monitor is enabled. In order to get
749 * this to work as desired, the permormance monitor needs to 1173 * this to work as desired, the permormance monitor needs to
@@ -755,10 +1179,33 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
755 spin_unlock_irqrestore(&virt_cntr_lock, flags); 1179 spin_unlock_irqrestore(&virt_cntr_lock, flags);
756} 1180}
757 1181
1182/*
1183 * This function is called from the generic OProfile
1184 * driver. When profiling PPUs, we need to do the
1185 * generic sync start; otherwise, do spu_sync_start.
1186 */
1187static int cell_sync_start(void)
1188{
1189 if (spu_cycle_reset)
1190 return spu_sync_start();
1191 else
1192 return DO_GENERIC_SYNC;
1193}
1194
1195static int cell_sync_stop(void)
1196{
1197 if (spu_cycle_reset)
1198 return spu_sync_stop();
1199 else
1200 return 1;
1201}
1202
758struct op_powerpc_model op_model_cell = { 1203struct op_powerpc_model op_model_cell = {
759 .reg_setup = cell_reg_setup, 1204 .reg_setup = cell_reg_setup,
760 .cpu_setup = cell_cpu_setup, 1205 .cpu_setup = cell_cpu_setup,
761 .global_start = cell_global_start, 1206 .global_start = cell_global_start,
762 .global_stop = cell_global_stop, 1207 .global_stop = cell_global_stop,
1208 .sync_start = cell_sync_start,
1209 .sync_stop = cell_sync_stop,
763 .handle_interrupt = cell_handle_interrupt, 1210 .handle_interrupt = cell_handle_interrupt,
764}; 1211};
diff --git a/arch/powerpc/oprofile/op_model_fsl_booke.c b/arch/powerpc/oprofile/op_model_fsl_booke.c
index 2267eb8c661b..183a28bb1812 100644
--- a/arch/powerpc/oprofile/op_model_fsl_booke.c
+++ b/arch/powerpc/oprofile/op_model_fsl_booke.c
@@ -244,7 +244,7 @@ static void dump_pmcs(void)
244 mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3)); 244 mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3));
245} 245}
246 246
247static void fsl_booke_cpu_setup(struct op_counter_config *ctr) 247static int fsl_booke_cpu_setup(struct op_counter_config *ctr)
248{ 248{
249 int i; 249 int i;
250 250
@@ -258,9 +258,11 @@ static void fsl_booke_cpu_setup(struct op_counter_config *ctr)
258 258
259 set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel); 259 set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel);
260 } 260 }
261
262 return 0;
261} 263}
262 264
263static void fsl_booke_reg_setup(struct op_counter_config *ctr, 265static int fsl_booke_reg_setup(struct op_counter_config *ctr,
264 struct op_system_config *sys, 266 struct op_system_config *sys,
265 int num_ctrs) 267 int num_ctrs)
266{ 268{
@@ -276,9 +278,10 @@ static void fsl_booke_reg_setup(struct op_counter_config *ctr,
276 for (i = 0; i < num_counters; ++i) 278 for (i = 0; i < num_counters; ++i)
277 reset_value[i] = 0x80000000UL - ctr[i].count; 279 reset_value[i] = 0x80000000UL - ctr[i].count;
278 280
281 return 0;
279} 282}
280 283
281static void fsl_booke_start(struct op_counter_config *ctr) 284static int fsl_booke_start(struct op_counter_config *ctr)
282{ 285{
283 int i; 286 int i;
284 287
@@ -308,6 +311,8 @@ static void fsl_booke_start(struct op_counter_config *ctr)
308 311
309 pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(), 312 pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(),
310 mfpmr(PMRN_PMGC0)); 313 mfpmr(PMRN_PMGC0));
314
315 return 0;
311} 316}
312 317
313static void fsl_booke_stop(void) 318static void fsl_booke_stop(void)
diff --git a/arch/powerpc/oprofile/op_model_pa6t.c b/arch/powerpc/oprofile/op_model_pa6t.c
index e8a56b0adadc..c40de461fd4e 100644
--- a/arch/powerpc/oprofile/op_model_pa6t.c
+++ b/arch/powerpc/oprofile/op_model_pa6t.c
@@ -89,7 +89,7 @@ static inline void ctr_write(unsigned int i, u64 val)
89 89
90 90
91/* precompute the values to stuff in the hardware registers */ 91/* precompute the values to stuff in the hardware registers */
92static void pa6t_reg_setup(struct op_counter_config *ctr, 92static int pa6t_reg_setup(struct op_counter_config *ctr,
93 struct op_system_config *sys, 93 struct op_system_config *sys,
94 int num_ctrs) 94 int num_ctrs)
95{ 95{
@@ -135,10 +135,12 @@ static void pa6t_reg_setup(struct op_counter_config *ctr,
135 pr_debug("reset_value for pmc%u inited to 0x%lx\n", 135 pr_debug("reset_value for pmc%u inited to 0x%lx\n",
136 pmc, reset_value[pmc]); 136 pmc, reset_value[pmc]);
137 } 137 }
138
139 return 0;
138} 140}
139 141
140/* configure registers on this cpu */ 142/* configure registers on this cpu */
141static void pa6t_cpu_setup(struct op_counter_config *ctr) 143static int pa6t_cpu_setup(struct op_counter_config *ctr)
142{ 144{
143 u64 mmcr0 = mmcr0_val; 145 u64 mmcr0 = mmcr0_val;
144 u64 mmcr1 = mmcr1_val; 146 u64 mmcr1 = mmcr1_val;
@@ -154,9 +156,11 @@ static void pa6t_cpu_setup(struct op_counter_config *ctr)
154 mfspr(SPRN_PA6T_MMCR0)); 156 mfspr(SPRN_PA6T_MMCR0));
155 pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(), 157 pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(),
156 mfspr(SPRN_PA6T_MMCR1)); 158 mfspr(SPRN_PA6T_MMCR1));
159
160 return 0;
157} 161}
158 162
159static void pa6t_start(struct op_counter_config *ctr) 163static int pa6t_start(struct op_counter_config *ctr)
160{ 164{
161 int i; 165 int i;
162 166
@@ -174,6 +178,8 @@ static void pa6t_start(struct op_counter_config *ctr)
174 oprofile_running = 1; 178 oprofile_running = 1;
175 179
176 pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0); 180 pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0);
181
182 return 0;
177} 183}
178 184
179static void pa6t_stop(void) 185static void pa6t_stop(void)
diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c
index a7c206b665af..cddc250a6a5c 100644
--- a/arch/powerpc/oprofile/op_model_power4.c
+++ b/arch/powerpc/oprofile/op_model_power4.c
@@ -32,7 +32,7 @@ static u32 mmcr0_val;
32static u64 mmcr1_val; 32static u64 mmcr1_val;
33static u64 mmcra_val; 33static u64 mmcra_val;
34 34
35static void power4_reg_setup(struct op_counter_config *ctr, 35static int power4_reg_setup(struct op_counter_config *ctr,
36 struct op_system_config *sys, 36 struct op_system_config *sys,
37 int num_ctrs) 37 int num_ctrs)
38{ 38{
@@ -60,6 +60,8 @@ static void power4_reg_setup(struct op_counter_config *ctr,
60 mmcr0_val &= ~MMCR0_PROBLEM_DISABLE; 60 mmcr0_val &= ~MMCR0_PROBLEM_DISABLE;
61 else 61 else
62 mmcr0_val |= MMCR0_PROBLEM_DISABLE; 62 mmcr0_val |= MMCR0_PROBLEM_DISABLE;
63
64 return 0;
63} 65}
64 66
65extern void ppc64_enable_pmcs(void); 67extern void ppc64_enable_pmcs(void);
@@ -84,7 +86,7 @@ static inline int mmcra_must_set_sample(void)
84 return 0; 86 return 0;
85} 87}
86 88
87static void power4_cpu_setup(struct op_counter_config *ctr) 89static int power4_cpu_setup(struct op_counter_config *ctr)
88{ 90{
89 unsigned int mmcr0 = mmcr0_val; 91 unsigned int mmcr0 = mmcr0_val;
90 unsigned long mmcra = mmcra_val; 92 unsigned long mmcra = mmcra_val;
@@ -111,9 +113,11 @@ static void power4_cpu_setup(struct op_counter_config *ctr)
111 mfspr(SPRN_MMCR1)); 113 mfspr(SPRN_MMCR1));
112 dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(), 114 dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(),
113 mfspr(SPRN_MMCRA)); 115 mfspr(SPRN_MMCRA));
116
117 return 0;
114} 118}
115 119
116static void power4_start(struct op_counter_config *ctr) 120static int power4_start(struct op_counter_config *ctr)
117{ 121{
118 int i; 122 int i;
119 unsigned int mmcr0; 123 unsigned int mmcr0;
@@ -148,6 +152,7 @@ static void power4_start(struct op_counter_config *ctr)
148 oprofile_running = 1; 152 oprofile_running = 1;
149 153
150 dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); 154 dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
155 return 0;
151} 156}
152 157
153static void power4_stop(void) 158static void power4_stop(void)
diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c
index c731acbfb2a5..a20afe45d936 100644
--- a/arch/powerpc/oprofile/op_model_rs64.c
+++ b/arch/powerpc/oprofile/op_model_rs64.c
@@ -88,7 +88,7 @@ static unsigned long reset_value[OP_MAX_COUNTER];
88 88
89static int num_counters; 89static int num_counters;
90 90
91static void rs64_reg_setup(struct op_counter_config *ctr, 91static int rs64_reg_setup(struct op_counter_config *ctr,
92 struct op_system_config *sys, 92 struct op_system_config *sys,
93 int num_ctrs) 93 int num_ctrs)
94{ 94{
@@ -100,9 +100,10 @@ static void rs64_reg_setup(struct op_counter_config *ctr,
100 reset_value[i] = 0x80000000UL - ctr[i].count; 100 reset_value[i] = 0x80000000UL - ctr[i].count;
101 101
102 /* XXX setup user and kernel profiling */ 102 /* XXX setup user and kernel profiling */
103 return 0;
103} 104}
104 105
105static void rs64_cpu_setup(struct op_counter_config *ctr) 106static int rs64_cpu_setup(struct op_counter_config *ctr)
106{ 107{
107 unsigned int mmcr0; 108 unsigned int mmcr0;
108 109
@@ -125,9 +126,11 @@ static void rs64_cpu_setup(struct op_counter_config *ctr)
125 mfspr(SPRN_MMCR0)); 126 mfspr(SPRN_MMCR0));
126 dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), 127 dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(),
127 mfspr(SPRN_MMCR1)); 128 mfspr(SPRN_MMCR1));
129
130 return 0;
128} 131}
129 132
130static void rs64_start(struct op_counter_config *ctr) 133static int rs64_start(struct op_counter_config *ctr)
131{ 134{
132 int i; 135 int i;
133 unsigned int mmcr0; 136 unsigned int mmcr0;
@@ -155,6 +158,7 @@ static void rs64_start(struct op_counter_config *ctr)
155 mtspr(SPRN_MMCR0, mmcr0); 158 mtspr(SPRN_MMCR0, mmcr0);
156 159
157 dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); 160 dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
161 return 0;
158} 162}
159 163
160static void rs64_stop(void) 164static void rs64_stop(void)
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index a7efb999d65e..6694f86d7000 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -22,6 +22,7 @@
22 22
23#include <linux/fs.h> 23#include <linux/fs.h>
24#include <linux/mm.h> 24#include <linux/mm.h>
25#include <linux/module.h>
25#include <linux/slab.h> 26#include <linux/slab.h>
26#include <asm/atomic.h> 27#include <asm/atomic.h>
27#include <asm/spu.h> 28#include <asm/spu.h>
@@ -81,6 +82,8 @@ void destroy_spu_context(struct kref *kref)
81 spu_fini_csa(&ctx->csa); 82 spu_fini_csa(&ctx->csa);
82 if (ctx->gang) 83 if (ctx->gang)
83 spu_gang_remove_ctx(ctx->gang, ctx); 84 spu_gang_remove_ctx(ctx->gang, ctx);
85 if (ctx->prof_priv_kref)
86 kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
84 BUG_ON(!list_empty(&ctx->rq)); 87 BUG_ON(!list_empty(&ctx->rq));
85 atomic_dec(&nr_spu_contexts); 88 atomic_dec(&nr_spu_contexts);
86 kfree(ctx); 89 kfree(ctx);
@@ -185,3 +188,20 @@ void spu_release_saved(struct spu_context *ctx)
185 188
186 spu_release(ctx); 189 spu_release(ctx);
187} 190}
191
192void spu_set_profile_private_kref(struct spu_context *ctx,
193 struct kref *prof_info_kref,
194 void ( * prof_info_release) (struct kref *kref))
195{
196 ctx->prof_priv_kref = prof_info_kref;
197 ctx->prof_priv_release = prof_info_release;
198}
199EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
200
201void *spu_get_profile_private_kref(struct spu_context *ctx)
202{
203 return ctx->prof_priv_kref;
204}
205EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
206
207
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 88ec333e90d3..44e2338a05d5 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -274,6 +274,7 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
274 ctx->spu = spu; 274 ctx->spu = spu;
275 ctx->ops = &spu_hw_ops; 275 ctx->ops = &spu_hw_ops;
276 spu->pid = current->pid; 276 spu->pid = current->pid;
277 spu->tgid = current->tgid;
277 spu_associate_mm(spu, ctx->owner); 278 spu_associate_mm(spu, ctx->owner);
278 spu->ibox_callback = spufs_ibox_callback; 279 spu->ibox_callback = spufs_ibox_callback;
279 spu->wbox_callback = spufs_wbox_callback; 280 spu->wbox_callback = spufs_wbox_callback;
@@ -456,6 +457,7 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
456 spu->dma_callback = NULL; 457 spu->dma_callback = NULL;
457 spu_associate_mm(spu, NULL); 458 spu_associate_mm(spu, NULL);
458 spu->pid = 0; 459 spu->pid = 0;
460 spu->tgid = 0;
459 ctx->ops = &spu_backing_ops; 461 ctx->ops = &spu_backing_ops;
460 spu->flags = 0; 462 spu->flags = 0;
461 spu->ctx = NULL; 463 spu->ctx = NULL;
@@ -737,7 +739,7 @@ void spu_deactivate(struct spu_context *ctx)
737} 739}
738 740
739/** 741/**
740 * spu_yield - yield a physical spu if others are waiting 742 * spu_yield - yield a physical spu if others are waiting
741 * @ctx: spu context to yield 743 * @ctx: spu context to yield
742 * 744 *
743 * Check if there is a higher priority context waiting and if yes 745 * Check if there is a higher priority context waiting and if yes
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 692dbd0edc37..8b20c0c1556f 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -85,6 +85,8 @@ struct spu_context {
85 85
86 struct list_head gang_list; 86 struct list_head gang_list;
87 struct spu_gang *gang; 87 struct spu_gang *gang;
88 struct kref *prof_priv_kref;
89 void ( * prof_priv_release) (struct kref *kref);
88 90
89 /* owner thread */ 91 /* owner thread */
90 pid_t tid; 92 pid_t tid;