diff options
Diffstat (limited to 'arch/powerpc')
-rw-r--r-- | arch/powerpc/configs/cell_defconfig | 3 | ||||
-rw-r--r-- | arch/powerpc/kernel/time.c | 1 | ||||
-rw-r--r-- | arch/powerpc/oprofile/Kconfig | 7 | ||||
-rw-r--r-- | arch/powerpc/oprofile/Makefile | 4 | ||||
-rw-r--r-- | arch/powerpc/oprofile/cell/pr_util.h | 97 | ||||
-rw-r--r-- | arch/powerpc/oprofile/cell/spu_profiler.c | 221 | ||||
-rw-r--r-- | arch/powerpc/oprofile/cell/spu_task_sync.c | 484 | ||||
-rw-r--r-- | arch/powerpc/oprofile/cell/vma_map.c | 287 | ||||
-rw-r--r-- | arch/powerpc/oprofile/common.c | 51 | ||||
-rw-r--r-- | arch/powerpc/oprofile/op_model_7450.c | 14 | ||||
-rw-r--r-- | arch/powerpc/oprofile/op_model_cell.c | 607 | ||||
-rw-r--r-- | arch/powerpc/oprofile/op_model_fsl_booke.c | 11 | ||||
-rw-r--r-- | arch/powerpc/oprofile/op_model_pa6t.c | 12 | ||||
-rw-r--r-- | arch/powerpc/oprofile/op_model_power4.c | 11 | ||||
-rw-r--r-- | arch/powerpc/oprofile/op_model_rs64.c | 10 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/context.c | 20 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/sched.c | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/spufs.h | 2 |
18 files changed, 1738 insertions, 108 deletions
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 74f83f4a4e5e..d9ac24e8de16 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig | |||
@@ -1455,7 +1455,8 @@ CONFIG_HAS_DMA=y | |||
1455 | # Instrumentation Support | 1455 | # Instrumentation Support |
1456 | # | 1456 | # |
1457 | CONFIG_PROFILING=y | 1457 | CONFIG_PROFILING=y |
1458 | CONFIG_OPROFILE=y | 1458 | CONFIG_OPROFILE=m |
1459 | CONFIG_OPROFILE_CELL=y | ||
1459 | # CONFIG_KPROBES is not set | 1460 | # CONFIG_KPROBES is not set |
1460 | 1461 | ||
1461 | # | 1462 | # |
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e5df167f7824..727a6699f2f4 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c | |||
@@ -122,6 +122,7 @@ extern struct timezone sys_tz; | |||
122 | static long timezone_offset; | 122 | static long timezone_offset; |
123 | 123 | ||
124 | unsigned long ppc_proc_freq; | 124 | unsigned long ppc_proc_freq; |
125 | EXPORT_SYMBOL(ppc_proc_freq); | ||
125 | unsigned long ppc_tb_freq; | 126 | unsigned long ppc_tb_freq; |
126 | 127 | ||
127 | static u64 tb_last_jiffy __cacheline_aligned_in_smp; | 128 | static u64 tb_last_jiffy __cacheline_aligned_in_smp; |
diff --git a/arch/powerpc/oprofile/Kconfig b/arch/powerpc/oprofile/Kconfig index eb2dece76a54..7089e79689b9 100644 --- a/arch/powerpc/oprofile/Kconfig +++ b/arch/powerpc/oprofile/Kconfig | |||
@@ -15,3 +15,10 @@ config OPROFILE | |||
15 | 15 | ||
16 | If unsure, say N. | 16 | If unsure, say N. |
17 | 17 | ||
18 | config OPROFILE_CELL | ||
19 | bool "OProfile for Cell Broadband Engine" | ||
20 | depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m) | ||
21 | default y | ||
22 | help | ||
23 | Profiling of Cell BE SPUs requires special support enabled | ||
24 | by this option. | ||
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile index 4b5f9528218c..c5f64c3bd668 100644 --- a/arch/powerpc/oprofile/Makefile +++ b/arch/powerpc/oprofile/Makefile | |||
@@ -11,7 +11,9 @@ DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \ | |||
11 | timer_int.o ) | 11 | timer_int.o ) |
12 | 12 | ||
13 | oprofile-y := $(DRIVER_OBJS) common.o backtrace.o | 13 | oprofile-y := $(DRIVER_OBJS) common.o backtrace.o |
14 | oprofile-$(CONFIG_PPC_CELL_NATIVE) += op_model_cell.o | 14 | oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \ |
15 | cell/spu_profiler.o cell/vma_map.o \ | ||
16 | cell/spu_task_sync.o | ||
15 | oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o | 17 | oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o |
16 | oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o | 18 | oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o |
17 | oprofile-$(CONFIG_6xx) += op_model_7450.o | 19 | oprofile-$(CONFIG_6xx) += op_model_7450.o |
diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h new file mode 100644 index 000000000000..e5704f00c8b4 --- /dev/null +++ b/arch/powerpc/oprofile/cell/pr_util.h | |||
@@ -0,0 +1,97 @@ | |||
1 | /* | ||
2 | * Cell Broadband Engine OProfile Support | ||
3 | * | ||
4 | * (C) Copyright IBM Corporation 2006 | ||
5 | * | ||
6 | * Author: Maynard Johnson <maynardj@us.ibm.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #ifndef PR_UTIL_H | ||
15 | #define PR_UTIL_H | ||
16 | |||
17 | #include <linux/cpumask.h> | ||
18 | #include <linux/oprofile.h> | ||
19 | #include <asm/cell-pmu.h> | ||
20 | #include <asm/spu.h> | ||
21 | |||
22 | #include "../../platforms/cell/cbe_regs.h" | ||
23 | |||
24 | /* Defines used for sync_start */ | ||
25 | #define SKIP_GENERIC_SYNC 0 | ||
26 | #define SYNC_START_ERROR -1 | ||
27 | #define DO_GENERIC_SYNC 1 | ||
28 | |||
29 | struct spu_overlay_info { /* map of sections within an SPU overlay */ | ||
30 | unsigned int vma; /* SPU virtual memory address from elf */ | ||
31 | unsigned int size; /* size of section from elf */ | ||
32 | unsigned int offset; /* offset of section into elf file */ | ||
33 | unsigned int buf; | ||
34 | }; | ||
35 | |||
36 | struct vma_to_fileoffset_map { /* map of sections within an SPU program */ | ||
37 | struct vma_to_fileoffset_map *next; /* list pointer */ | ||
38 | unsigned int vma; /* SPU virtual memory address from elf */ | ||
39 | unsigned int size; /* size of section from elf */ | ||
40 | unsigned int offset; /* offset of section into elf file */ | ||
41 | unsigned int guard_ptr; | ||
42 | unsigned int guard_val; | ||
43 | /* | ||
44 | * The guard pointer is an entry in the _ovly_buf_table, | ||
45 | * computed using ovly.buf as the index into the table. Since | ||
46 | * ovly.buf values begin at '1' to reference the first (or 0th) | ||
47 | * entry in the _ovly_buf_table, the computation subtracts 1 | ||
48 | * from ovly.buf. | ||
49 | * The guard value is stored in the _ovly_buf_table entry and | ||
50 | * is an index (starting at 1) back to the _ovly_table entry | ||
51 | * that is pointing at this _ovly_buf_table entry. So, for | ||
52 | * example, for an overlay scenario with one overlay segment | ||
53 | * and two overlay sections: | ||
54 | * - Section 1 points to the first entry of the | ||
55 | * _ovly_buf_table, which contains a guard value | ||
56 | * of '1', referencing the first (index=0) entry of | ||
57 | * _ovly_table. | ||
58 | * - Section 2 points to the second entry of the | ||
59 | * _ovly_buf_table, which contains a guard value | ||
60 | * of '2', referencing the second (index=1) entry of | ||
61 | * _ovly_table. | ||
62 | */ | ||
63 | |||
64 | }; | ||
65 | |||
66 | /* The three functions below are for maintaining and accessing | ||
67 | * the vma-to-fileoffset map. | ||
68 | */ | ||
69 | struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu, | ||
70 | u64 objectid); | ||
71 | unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map, | ||
72 | unsigned int vma, const struct spu *aSpu, | ||
73 | int *grd_val); | ||
74 | void vma_map_free(struct vma_to_fileoffset_map *map); | ||
75 | |||
76 | /* | ||
77 | * Entry point for SPU profiling. | ||
78 | * cycles_reset is the SPU_CYCLES count value specified by the user. | ||
79 | */ | ||
80 | int start_spu_profiling(unsigned int cycles_reset); | ||
81 | |||
82 | void stop_spu_profiling(void); | ||
83 | |||
84 | |||
85 | /* add the necessary profiling hooks */ | ||
86 | int spu_sync_start(void); | ||
87 | |||
88 | /* remove the hooks */ | ||
89 | int spu_sync_stop(void); | ||
90 | |||
91 | /* Record SPU program counter samples to the oprofile event buffer. */ | ||
92 | void spu_sync_buffer(int spu_num, unsigned int *samples, | ||
93 | int num_samples); | ||
94 | |||
95 | void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset); | ||
96 | |||
97 | #endif /* PR_UTIL_H */ | ||
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c new file mode 100644 index 000000000000..380d7e217531 --- /dev/null +++ b/arch/powerpc/oprofile/cell/spu_profiler.c | |||
@@ -0,0 +1,221 @@ | |||
1 | /* | ||
2 | * Cell Broadband Engine OProfile Support | ||
3 | * | ||
4 | * (C) Copyright IBM Corporation 2006 | ||
5 | * | ||
6 | * Authors: Maynard Johnson <maynardj@us.ibm.com> | ||
7 | * Carl Love <carll@us.ibm.com> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | */ | ||
14 | |||
15 | #include <linux/hrtimer.h> | ||
16 | #include <linux/smp.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include <asm/cell-pmu.h> | ||
19 | #include "pr_util.h" | ||
20 | |||
21 | #define TRACE_ARRAY_SIZE 1024 | ||
22 | #define SCALE_SHIFT 14 | ||
23 | |||
24 | static u32 *samples; | ||
25 | |||
26 | static int spu_prof_running; | ||
27 | static unsigned int profiling_interval; | ||
28 | |||
29 | #define NUM_SPU_BITS_TRBUF 16 | ||
30 | #define SPUS_PER_TB_ENTRY 4 | ||
31 | #define SPUS_PER_NODE 8 | ||
32 | |||
33 | #define SPU_PC_MASK 0xFFFF | ||
34 | |||
35 | static DEFINE_SPINLOCK(sample_array_lock); | ||
36 | unsigned long sample_array_lock_flags; | ||
37 | |||
38 | void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) | ||
39 | { | ||
40 | unsigned long ns_per_cyc; | ||
41 | |||
42 | if (!freq_khz) | ||
43 | freq_khz = ppc_proc_freq/1000; | ||
44 | |||
45 | /* To calculate a timeout in nanoseconds, the basic | ||
46 | * formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency). | ||
47 | * To avoid floating point math, we use the scale math | ||
48 | * technique as described in linux/jiffies.h. We use | ||
49 | * a scale factor of SCALE_SHIFT, which provides 4 decimal places | ||
50 | * of precision. This is close enough for the purpose at hand. | ||
51 | * | ||
52 | * The value of the timeout should be small enough that the hw | ||
53 | * trace buffer will not get more then about 1/3 full for the | ||
54 | * maximum user specified (the LFSR value) hw sampling frequency. | ||
55 | * This is to ensure the trace buffer will never fill even if the | ||
56 | * kernel thread scheduling varies under a heavy system load. | ||
57 | */ | ||
58 | |||
59 | ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz; | ||
60 | profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT; | ||
61 | |||
62 | } | ||
63 | |||
64 | /* | ||
65 | * Extract SPU PC from trace buffer entry | ||
66 | */ | ||
67 | static void spu_pc_extract(int cpu, int entry) | ||
68 | { | ||
69 | /* the trace buffer is 128 bits */ | ||
70 | u64 trace_buffer[2]; | ||
71 | u64 spu_mask; | ||
72 | int spu; | ||
73 | |||
74 | spu_mask = SPU_PC_MASK; | ||
75 | |||
76 | /* Each SPU PC is 16 bits; hence, four spus in each of | ||
77 | * the two 64-bit buffer entries that make up the | ||
78 | * 128-bit trace_buffer entry. Process two 64-bit values | ||
79 | * simultaneously. | ||
80 | * trace[0] SPU PC contents are: 0 1 2 3 | ||
81 | * trace[1] SPU PC contents are: 4 5 6 7 | ||
82 | */ | ||
83 | |||
84 | cbe_read_trace_buffer(cpu, trace_buffer); | ||
85 | |||
86 | for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) { | ||
87 | /* spu PC trace entry is upper 16 bits of the | ||
88 | * 18 bit SPU program counter | ||
89 | */ | ||
90 | samples[spu * TRACE_ARRAY_SIZE + entry] | ||
91 | = (spu_mask & trace_buffer[0]) << 2; | ||
92 | samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry] | ||
93 | = (spu_mask & trace_buffer[1]) << 2; | ||
94 | |||
95 | trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF; | ||
96 | trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | static int cell_spu_pc_collection(int cpu) | ||
101 | { | ||
102 | u32 trace_addr; | ||
103 | int entry; | ||
104 | |||
105 | /* process the collected SPU PC for the node */ | ||
106 | |||
107 | entry = 0; | ||
108 | |||
109 | trace_addr = cbe_read_pm(cpu, trace_address); | ||
110 | while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) { | ||
111 | /* there is data in the trace buffer to process */ | ||
112 | spu_pc_extract(cpu, entry); | ||
113 | |||
114 | entry++; | ||
115 | |||
116 | if (entry >= TRACE_ARRAY_SIZE) | ||
117 | /* spu_samples is full */ | ||
118 | break; | ||
119 | |||
120 | trace_addr = cbe_read_pm(cpu, trace_address); | ||
121 | } | ||
122 | |||
123 | return entry; | ||
124 | } | ||
125 | |||
126 | |||
127 | static enum hrtimer_restart profile_spus(struct hrtimer *timer) | ||
128 | { | ||
129 | ktime_t kt; | ||
130 | int cpu, node, k, num_samples, spu_num; | ||
131 | |||
132 | if (!spu_prof_running) | ||
133 | goto stop; | ||
134 | |||
135 | for_each_online_cpu(cpu) { | ||
136 | if (cbe_get_hw_thread_id(cpu)) | ||
137 | continue; | ||
138 | |||
139 | node = cbe_cpu_to_node(cpu); | ||
140 | |||
141 | /* There should only be one kernel thread at a time processing | ||
142 | * the samples. In the very unlikely case that the processing | ||
143 | * is taking a very long time and multiple kernel threads are | ||
144 | * started to process the samples. Make sure only one kernel | ||
145 | * thread is working on the samples array at a time. The | ||
146 | * sample array must be loaded and then processed for a given | ||
147 | * cpu. The sample array is not per cpu. | ||
148 | */ | ||
149 | spin_lock_irqsave(&sample_array_lock, | ||
150 | sample_array_lock_flags); | ||
151 | num_samples = cell_spu_pc_collection(cpu); | ||
152 | |||
153 | if (num_samples == 0) { | ||
154 | spin_unlock_irqrestore(&sample_array_lock, | ||
155 | sample_array_lock_flags); | ||
156 | continue; | ||
157 | } | ||
158 | |||
159 | for (k = 0; k < SPUS_PER_NODE; k++) { | ||
160 | spu_num = k + (node * SPUS_PER_NODE); | ||
161 | spu_sync_buffer(spu_num, | ||
162 | samples + (k * TRACE_ARRAY_SIZE), | ||
163 | num_samples); | ||
164 | } | ||
165 | |||
166 | spin_unlock_irqrestore(&sample_array_lock, | ||
167 | sample_array_lock_flags); | ||
168 | |||
169 | } | ||
170 | smp_wmb(); /* insure spu event buffer updates are written */ | ||
171 | /* don't want events intermingled... */ | ||
172 | |||
173 | kt = ktime_set(0, profiling_interval); | ||
174 | if (!spu_prof_running) | ||
175 | goto stop; | ||
176 | hrtimer_forward(timer, timer->base->get_time(), kt); | ||
177 | return HRTIMER_RESTART; | ||
178 | |||
179 | stop: | ||
180 | printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n"); | ||
181 | return HRTIMER_NORESTART; | ||
182 | } | ||
183 | |||
184 | static struct hrtimer timer; | ||
185 | /* | ||
186 | * Entry point for SPU profiling. | ||
187 | * NOTE: SPU profiling is done system-wide, not per-CPU. | ||
188 | * | ||
189 | * cycles_reset is the count value specified by the user when | ||
190 | * setting up OProfile to count SPU_CYCLES. | ||
191 | */ | ||
192 | int start_spu_profiling(unsigned int cycles_reset) | ||
193 | { | ||
194 | ktime_t kt; | ||
195 | |||
196 | pr_debug("timer resolution: %lu\n", TICK_NSEC); | ||
197 | kt = ktime_set(0, profiling_interval); | ||
198 | hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
199 | timer.expires = kt; | ||
200 | timer.function = profile_spus; | ||
201 | |||
202 | /* Allocate arrays for collecting SPU PC samples */ | ||
203 | samples = kzalloc(SPUS_PER_NODE * | ||
204 | TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL); | ||
205 | |||
206 | if (!samples) | ||
207 | return -ENOMEM; | ||
208 | |||
209 | spu_prof_running = 1; | ||
210 | hrtimer_start(&timer, kt, HRTIMER_MODE_REL); | ||
211 | |||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | void stop_spu_profiling(void) | ||
216 | { | ||
217 | spu_prof_running = 0; | ||
218 | hrtimer_cancel(&timer); | ||
219 | kfree(samples); | ||
220 | pr_debug("SPU_PROF: stop_spu_profiling issued\n"); | ||
221 | } | ||
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c new file mode 100644 index 000000000000..133665754a75 --- /dev/null +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c | |||
@@ -0,0 +1,484 @@ | |||
1 | /* | ||
2 | * Cell Broadband Engine OProfile Support | ||
3 | * | ||
4 | * (C) Copyright IBM Corporation 2006 | ||
5 | * | ||
6 | * Author: Maynard Johnson <maynardj@us.ibm.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | /* The purpose of this file is to handle SPU event task switching | ||
15 | * and to record SPU context information into the OProfile | ||
16 | * event buffer. | ||
17 | * | ||
18 | * Additionally, the spu_sync_buffer function is provided as a helper | ||
19 | * for recoding actual SPU program counter samples to the event buffer. | ||
20 | */ | ||
21 | #include <linux/dcookies.h> | ||
22 | #include <linux/kref.h> | ||
23 | #include <linux/mm.h> | ||
24 | #include <linux/module.h> | ||
25 | #include <linux/notifier.h> | ||
26 | #include <linux/numa.h> | ||
27 | #include <linux/oprofile.h> | ||
28 | #include <linux/spinlock.h> | ||
29 | #include "pr_util.h" | ||
30 | |||
31 | #define RELEASE_ALL 9999 | ||
32 | |||
33 | static DEFINE_SPINLOCK(buffer_lock); | ||
34 | static DEFINE_SPINLOCK(cache_lock); | ||
35 | static int num_spu_nodes; | ||
36 | int spu_prof_num_nodes; | ||
37 | int last_guard_val[MAX_NUMNODES * 8]; | ||
38 | |||
39 | /* Container for caching information about an active SPU task. */ | ||
40 | struct cached_info { | ||
41 | struct vma_to_fileoffset_map *map; | ||
42 | struct spu *the_spu; /* needed to access pointer to local_store */ | ||
43 | struct kref cache_ref; | ||
44 | }; | ||
45 | |||
46 | static struct cached_info *spu_info[MAX_NUMNODES * 8]; | ||
47 | |||
48 | static void destroy_cached_info(struct kref *kref) | ||
49 | { | ||
50 | struct cached_info *info; | ||
51 | |||
52 | info = container_of(kref, struct cached_info, cache_ref); | ||
53 | vma_map_free(info->map); | ||
54 | kfree(info); | ||
55 | module_put(THIS_MODULE); | ||
56 | } | ||
57 | |||
58 | /* Return the cached_info for the passed SPU number. | ||
59 | * ATTENTION: Callers are responsible for obtaining the | ||
60 | * cache_lock if needed prior to invoking this function. | ||
61 | */ | ||
62 | static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num) | ||
63 | { | ||
64 | struct kref *ref; | ||
65 | struct cached_info *ret_info; | ||
66 | |||
67 | if (spu_num >= num_spu_nodes) { | ||
68 | printk(KERN_ERR "SPU_PROF: " | ||
69 | "%s, line %d: Invalid index %d into spu info cache\n", | ||
70 | __FUNCTION__, __LINE__, spu_num); | ||
71 | ret_info = NULL; | ||
72 | goto out; | ||
73 | } | ||
74 | if (!spu_info[spu_num] && the_spu) { | ||
75 | ref = spu_get_profile_private_kref(the_spu->ctx); | ||
76 | if (ref) { | ||
77 | spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref); | ||
78 | kref_get(&spu_info[spu_num]->cache_ref); | ||
79 | } | ||
80 | } | ||
81 | |||
82 | ret_info = spu_info[spu_num]; | ||
83 | out: | ||
84 | return ret_info; | ||
85 | } | ||
86 | |||
87 | |||
88 | /* Looks for cached info for the passed spu. If not found, the | ||
89 | * cached info is created for the passed spu. | ||
90 | * Returns 0 for success; otherwise, -1 for error. | ||
91 | */ | ||
92 | static int | ||
93 | prepare_cached_spu_info(struct spu *spu, unsigned long objectId) | ||
94 | { | ||
95 | unsigned long flags; | ||
96 | struct vma_to_fileoffset_map *new_map; | ||
97 | int retval = 0; | ||
98 | struct cached_info *info; | ||
99 | |||
100 | /* We won't bother getting cache_lock here since | ||
101 | * don't do anything with the cached_info that's returned. | ||
102 | */ | ||
103 | info = get_cached_info(spu, spu->number); | ||
104 | |||
105 | if (info) { | ||
106 | pr_debug("Found cached SPU info.\n"); | ||
107 | goto out; | ||
108 | } | ||
109 | |||
110 | /* Create cached_info and set spu_info[spu->number] to point to it. | ||
111 | * spu->number is a system-wide value, not a per-node value. | ||
112 | */ | ||
113 | info = kzalloc(sizeof(struct cached_info), GFP_KERNEL); | ||
114 | if (!info) { | ||
115 | printk(KERN_ERR "SPU_PROF: " | ||
116 | "%s, line %d: create vma_map failed\n", | ||
117 | __FUNCTION__, __LINE__); | ||
118 | retval = -ENOMEM; | ||
119 | goto err_alloc; | ||
120 | } | ||
121 | new_map = create_vma_map(spu, objectId); | ||
122 | if (!new_map) { | ||
123 | printk(KERN_ERR "SPU_PROF: " | ||
124 | "%s, line %d: create vma_map failed\n", | ||
125 | __FUNCTION__, __LINE__); | ||
126 | retval = -ENOMEM; | ||
127 | goto err_alloc; | ||
128 | } | ||
129 | |||
130 | pr_debug("Created vma_map\n"); | ||
131 | info->map = new_map; | ||
132 | info->the_spu = spu; | ||
133 | kref_init(&info->cache_ref); | ||
134 | spin_lock_irqsave(&cache_lock, flags); | ||
135 | spu_info[spu->number] = info; | ||
136 | /* Increment count before passing off ref to SPUFS. */ | ||
137 | kref_get(&info->cache_ref); | ||
138 | |||
139 | /* We increment the module refcount here since SPUFS is | ||
140 | * responsible for the final destruction of the cached_info, | ||
141 | * and it must be able to access the destroy_cached_info() | ||
142 | * function defined in the OProfile module. We decrement | ||
143 | * the module refcount in destroy_cached_info. | ||
144 | */ | ||
145 | try_module_get(THIS_MODULE); | ||
146 | spu_set_profile_private_kref(spu->ctx, &info->cache_ref, | ||
147 | destroy_cached_info); | ||
148 | spin_unlock_irqrestore(&cache_lock, flags); | ||
149 | goto out; | ||
150 | |||
151 | err_alloc: | ||
152 | kfree(info); | ||
153 | out: | ||
154 | return retval; | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * NOTE: The caller is responsible for locking the | ||
159 | * cache_lock prior to calling this function. | ||
160 | */ | ||
161 | static int release_cached_info(int spu_index) | ||
162 | { | ||
163 | int index, end; | ||
164 | |||
165 | if (spu_index == RELEASE_ALL) { | ||
166 | end = num_spu_nodes; | ||
167 | index = 0; | ||
168 | } else { | ||
169 | if (spu_index >= num_spu_nodes) { | ||
170 | printk(KERN_ERR "SPU_PROF: " | ||
171 | "%s, line %d: " | ||
172 | "Invalid index %d into spu info cache\n", | ||
173 | __FUNCTION__, __LINE__, spu_index); | ||
174 | goto out; | ||
175 | } | ||
176 | end = spu_index + 1; | ||
177 | index = spu_index; | ||
178 | } | ||
179 | for (; index < end; index++) { | ||
180 | if (spu_info[index]) { | ||
181 | kref_put(&spu_info[index]->cache_ref, | ||
182 | destroy_cached_info); | ||
183 | spu_info[index] = NULL; | ||
184 | } | ||
185 | } | ||
186 | |||
187 | out: | ||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | /* The source code for fast_get_dcookie was "borrowed" | ||
192 | * from drivers/oprofile/buffer_sync.c. | ||
193 | */ | ||
194 | |||
195 | /* Optimisation. We can manage without taking the dcookie sem | ||
196 | * because we cannot reach this code without at least one | ||
197 | * dcookie user still being registered (namely, the reader | ||
198 | * of the event buffer). | ||
199 | */ | ||
200 | static inline unsigned long fast_get_dcookie(struct dentry *dentry, | ||
201 | struct vfsmount *vfsmnt) | ||
202 | { | ||
203 | unsigned long cookie; | ||
204 | |||
205 | if (dentry->d_cookie) | ||
206 | return (unsigned long)dentry; | ||
207 | get_dcookie(dentry, vfsmnt, &cookie); | ||
208 | return cookie; | ||
209 | } | ||
210 | |||
211 | /* Look up the dcookie for the task's first VM_EXECUTABLE mapping, | ||
212 | * which corresponds loosely to "application name". Also, determine | ||
213 | * the offset for the SPU ELF object. If computed offset is | ||
214 | * non-zero, it implies an embedded SPU object; otherwise, it's a | ||
215 | * separate SPU binary, in which case we retrieve it's dcookie. | ||
216 | * For the embedded case, we must determine if SPU ELF is embedded | ||
217 | * in the executable application or another file (i.e., shared lib). | ||
218 | * If embedded in a shared lib, we must get the dcookie and return | ||
219 | * that to the caller. | ||
220 | */ | ||
221 | static unsigned long | ||
222 | get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp, | ||
223 | unsigned long *spu_bin_dcookie, | ||
224 | unsigned long spu_ref) | ||
225 | { | ||
226 | unsigned long app_cookie = 0; | ||
227 | unsigned int my_offset = 0; | ||
228 | struct file *app = NULL; | ||
229 | struct vm_area_struct *vma; | ||
230 | struct mm_struct *mm = spu->mm; | ||
231 | |||
232 | if (!mm) | ||
233 | goto out; | ||
234 | |||
235 | down_read(&mm->mmap_sem); | ||
236 | |||
237 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
238 | if (!vma->vm_file) | ||
239 | continue; | ||
240 | if (!(vma->vm_flags & VM_EXECUTABLE)) | ||
241 | continue; | ||
242 | app_cookie = fast_get_dcookie(vma->vm_file->f_dentry, | ||
243 | vma->vm_file->f_vfsmnt); | ||
244 | pr_debug("got dcookie for %s\n", | ||
245 | vma->vm_file->f_dentry->d_name.name); | ||
246 | app = vma->vm_file; | ||
247 | break; | ||
248 | } | ||
249 | |||
250 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
251 | if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref) | ||
252 | continue; | ||
253 | my_offset = spu_ref - vma->vm_start; | ||
254 | if (!vma->vm_file) | ||
255 | goto fail_no_image_cookie; | ||
256 | |||
257 | pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n", | ||
258 | my_offset, spu_ref, | ||
259 | vma->vm_file->f_dentry->d_name.name); | ||
260 | *offsetp = my_offset; | ||
261 | break; | ||
262 | } | ||
263 | |||
264 | *spu_bin_dcookie = fast_get_dcookie(vma->vm_file->f_dentry, | ||
265 | vma->vm_file->f_vfsmnt); | ||
266 | pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name); | ||
267 | |||
268 | up_read(&mm->mmap_sem); | ||
269 | |||
270 | out: | ||
271 | return app_cookie; | ||
272 | |||
273 | fail_no_image_cookie: | ||
274 | up_read(&mm->mmap_sem); | ||
275 | |||
276 | printk(KERN_ERR "SPU_PROF: " | ||
277 | "%s, line %d: Cannot find dcookie for SPU binary\n", | ||
278 | __FUNCTION__, __LINE__); | ||
279 | goto out; | ||
280 | } | ||
281 | |||
282 | |||
283 | |||
284 | /* This function finds or creates cached context information for the | ||
285 | * passed SPU and records SPU context information into the OProfile | ||
286 | * event buffer. | ||
287 | */ | ||
288 | static int process_context_switch(struct spu *spu, unsigned long objectId) | ||
289 | { | ||
290 | unsigned long flags; | ||
291 | int retval; | ||
292 | unsigned int offset = 0; | ||
293 | unsigned long spu_cookie = 0, app_dcookie; | ||
294 | |||
295 | retval = prepare_cached_spu_info(spu, objectId); | ||
296 | if (retval) | ||
297 | goto out; | ||
298 | |||
299 | /* Get dcookie first because a mutex_lock is taken in that | ||
300 | * code path, so interrupts must not be disabled. | ||
301 | */ | ||
302 | app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId); | ||
303 | if (!app_dcookie || !spu_cookie) { | ||
304 | retval = -ENOENT; | ||
305 | goto out; | ||
306 | } | ||
307 | |||
308 | /* Record context info in event buffer */ | ||
309 | spin_lock_irqsave(&buffer_lock, flags); | ||
310 | add_event_entry(ESCAPE_CODE); | ||
311 | add_event_entry(SPU_CTX_SWITCH_CODE); | ||
312 | add_event_entry(spu->number); | ||
313 | add_event_entry(spu->pid); | ||
314 | add_event_entry(spu->tgid); | ||
315 | add_event_entry(app_dcookie); | ||
316 | add_event_entry(spu_cookie); | ||
317 | add_event_entry(offset); | ||
318 | spin_unlock_irqrestore(&buffer_lock, flags); | ||
319 | smp_wmb(); /* insure spu event buffer updates are written */ | ||
320 | /* don't want entries intermingled... */ | ||
321 | out: | ||
322 | return retval; | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * This function is invoked on either a bind_context or unbind_context. | ||
327 | * If called for an unbind_context, the val arg is 0; otherwise, | ||
328 | * it is the object-id value for the spu context. | ||
329 | * The data arg is of type 'struct spu *'. | ||
330 | */ | ||
331 | static int spu_active_notify(struct notifier_block *self, unsigned long val, | ||
332 | void *data) | ||
333 | { | ||
334 | int retval; | ||
335 | unsigned long flags; | ||
336 | struct spu *the_spu = data; | ||
337 | |||
338 | pr_debug("SPU event notification arrived\n"); | ||
339 | if (!val) { | ||
340 | spin_lock_irqsave(&cache_lock, flags); | ||
341 | retval = release_cached_info(the_spu->number); | ||
342 | spin_unlock_irqrestore(&cache_lock, flags); | ||
343 | } else { | ||
344 | retval = process_context_switch(the_spu, val); | ||
345 | } | ||
346 | return retval; | ||
347 | } | ||
348 | |||
349 | static struct notifier_block spu_active = { | ||
350 | .notifier_call = spu_active_notify, | ||
351 | }; | ||
352 | |||
353 | static int number_of_online_nodes(void) | ||
354 | { | ||
355 | u32 cpu; u32 tmp; | ||
356 | int nodes = 0; | ||
357 | for_each_online_cpu(cpu) { | ||
358 | tmp = cbe_cpu_to_node(cpu) + 1; | ||
359 | if (tmp > nodes) | ||
360 | nodes++; | ||
361 | } | ||
362 | return nodes; | ||
363 | } | ||
364 | |||
365 | /* The main purpose of this function is to synchronize | ||
366 | * OProfile with SPUFS by registering to be notified of | ||
367 | * SPU task switches. | ||
368 | * | ||
369 | * NOTE: When profiling SPUs, we must ensure that only | ||
370 | * spu_sync_start is invoked and not the generic sync_start | ||
371 | * in drivers/oprofile/oprof.c. A return value of | ||
372 | * SKIP_GENERIC_SYNC or SYNC_START_ERROR will | ||
373 | * accomplish this. | ||
374 | */ | ||
375 | int spu_sync_start(void) | ||
376 | { | ||
377 | int k; | ||
378 | int ret = SKIP_GENERIC_SYNC; | ||
379 | int register_ret; | ||
380 | unsigned long flags = 0; | ||
381 | |||
382 | spu_prof_num_nodes = number_of_online_nodes(); | ||
383 | num_spu_nodes = spu_prof_num_nodes * 8; | ||
384 | |||
385 | spin_lock_irqsave(&buffer_lock, flags); | ||
386 | add_event_entry(ESCAPE_CODE); | ||
387 | add_event_entry(SPU_PROFILING_CODE); | ||
388 | add_event_entry(num_spu_nodes); | ||
389 | spin_unlock_irqrestore(&buffer_lock, flags); | ||
390 | |||
391 | /* Register for SPU events */ | ||
392 | register_ret = spu_switch_event_register(&spu_active); | ||
393 | if (register_ret) { | ||
394 | ret = SYNC_START_ERROR; | ||
395 | goto out; | ||
396 | } | ||
397 | |||
398 | for (k = 0; k < (MAX_NUMNODES * 8); k++) | ||
399 | last_guard_val[k] = 0; | ||
400 | pr_debug("spu_sync_start -- running.\n"); | ||
401 | out: | ||
402 | return ret; | ||
403 | } | ||
404 | |||
405 | /* Record SPU program counter samples to the oprofile event buffer. */ | ||
406 | void spu_sync_buffer(int spu_num, unsigned int *samples, | ||
407 | int num_samples) | ||
408 | { | ||
409 | unsigned long long file_offset; | ||
410 | unsigned long flags; | ||
411 | int i; | ||
412 | struct vma_to_fileoffset_map *map; | ||
413 | struct spu *the_spu; | ||
414 | unsigned long long spu_num_ll = spu_num; | ||
415 | unsigned long long spu_num_shifted = spu_num_ll << 32; | ||
416 | struct cached_info *c_info; | ||
417 | |||
418 | /* We need to obtain the cache_lock here because it's | ||
419 | * possible that after getting the cached_info, the SPU job | ||
420 | * corresponding to this cached_info may end, thus resulting | ||
421 | * in the destruction of the cached_info. | ||
422 | */ | ||
423 | spin_lock_irqsave(&cache_lock, flags); | ||
424 | c_info = get_cached_info(NULL, spu_num); | ||
425 | if (!c_info) { | ||
426 | /* This legitimately happens when the SPU task ends before all | ||
427 | * samples are recorded. | ||
428 | * No big deal -- so we just drop a few samples. | ||
429 | */ | ||
430 | pr_debug("SPU_PROF: No cached SPU contex " | ||
431 | "for SPU #%d. Dropping samples.\n", spu_num); | ||
432 | goto out; | ||
433 | } | ||
434 | |||
435 | map = c_info->map; | ||
436 | the_spu = c_info->the_spu; | ||
437 | spin_lock(&buffer_lock); | ||
438 | for (i = 0; i < num_samples; i++) { | ||
439 | unsigned int sample = *(samples+i); | ||
440 | int grd_val = 0; | ||
441 | file_offset = 0; | ||
442 | if (sample == 0) | ||
443 | continue; | ||
444 | file_offset = vma_map_lookup( map, sample, the_spu, &grd_val); | ||
445 | |||
446 | /* If overlays are used by this SPU application, the guard | ||
447 | * value is non-zero, indicating which overlay section is in | ||
448 | * use. We need to discard samples taken during the time | ||
449 | * period which an overlay occurs (i.e., guard value changes). | ||
450 | */ | ||
451 | if (grd_val && grd_val != last_guard_val[spu_num]) { | ||
452 | last_guard_val[spu_num] = grd_val; | ||
453 | /* Drop the rest of the samples. */ | ||
454 | break; | ||
455 | } | ||
456 | |||
457 | add_event_entry(file_offset | spu_num_shifted); | ||
458 | } | ||
459 | spin_unlock(&buffer_lock); | ||
460 | out: | ||
461 | spin_unlock_irqrestore(&cache_lock, flags); | ||
462 | } | ||
463 | |||
464 | |||
465 | int spu_sync_stop(void) | ||
466 | { | ||
467 | unsigned long flags = 0; | ||
468 | int ret = spu_switch_event_unregister(&spu_active); | ||
469 | if (ret) { | ||
470 | printk(KERN_ERR "SPU_PROF: " | ||
471 | "%s, line %d: spu_switch_event_unregister returned %d\n", | ||
472 | __FUNCTION__, __LINE__, ret); | ||
473 | goto out; | ||
474 | } | ||
475 | |||
476 | spin_lock_irqsave(&cache_lock, flags); | ||
477 | ret = release_cached_info(RELEASE_ALL); | ||
478 | spin_unlock_irqrestore(&cache_lock, flags); | ||
479 | out: | ||
480 | pr_debug("spu_sync_stop -- done.\n"); | ||
481 | return ret; | ||
482 | } | ||
483 | |||
484 | |||
diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c new file mode 100644 index 000000000000..76ec1d16aef7 --- /dev/null +++ b/arch/powerpc/oprofile/cell/vma_map.c | |||
@@ -0,0 +1,287 @@ | |||
1 | /* | ||
2 | * Cell Broadband Engine OProfile Support | ||
3 | * | ||
4 | * (C) Copyright IBM Corporation 2006 | ||
5 | * | ||
6 | * Author: Maynard Johnson <maynardj@us.ibm.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | /* The code in this source file is responsible for generating | ||
15 | * vma-to-fileOffset maps for both overlay and non-overlay SPU | ||
16 | * applications. | ||
17 | */ | ||
18 | |||
19 | #include <linux/mm.h> | ||
20 | #include <linux/string.h> | ||
21 | #include <linux/uaccess.h> | ||
22 | #include <linux/elf.h> | ||
23 | #include "pr_util.h" | ||
24 | |||
25 | |||
26 | void vma_map_free(struct vma_to_fileoffset_map *map) | ||
27 | { | ||
28 | while (map) { | ||
29 | struct vma_to_fileoffset_map *next = map->next; | ||
30 | kfree(map); | ||
31 | map = next; | ||
32 | } | ||
33 | } | ||
34 | |||
35 | unsigned int | ||
36 | vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma, | ||
37 | const struct spu *aSpu, int *grd_val) | ||
38 | { | ||
39 | /* | ||
40 | * Default the offset to the physical address + a flag value. | ||
41 | * Addresses of dynamically generated code can't be found in the vma | ||
42 | * map. For those addresses the flagged value will be sent on to | ||
43 | * the user space tools so they can be reported rather than just | ||
44 | * thrown away. | ||
45 | */ | ||
46 | u32 offset = 0x10000000 + vma; | ||
47 | u32 ovly_grd; | ||
48 | |||
49 | for (; map; map = map->next) { | ||
50 | if (vma < map->vma || vma >= map->vma + map->size) | ||
51 | continue; | ||
52 | |||
53 | if (map->guard_ptr) { | ||
54 | ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr); | ||
55 | if (ovly_grd != map->guard_val) | ||
56 | continue; | ||
57 | *grd_val = ovly_grd; | ||
58 | } | ||
59 | offset = vma - map->vma + map->offset; | ||
60 | break; | ||
61 | } | ||
62 | |||
63 | return offset; | ||
64 | } | ||
65 | |||
66 | static struct vma_to_fileoffset_map * | ||
67 | vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma, | ||
68 | unsigned int size, unsigned int offset, unsigned int guard_ptr, | ||
69 | unsigned int guard_val) | ||
70 | { | ||
71 | struct vma_to_fileoffset_map *new = | ||
72 | kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL); | ||
73 | if (!new) { | ||
74 | printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n", | ||
75 | __FUNCTION__, __LINE__); | ||
76 | vma_map_free(map); | ||
77 | return NULL; | ||
78 | } | ||
79 | |||
80 | new->next = map; | ||
81 | new->vma = vma; | ||
82 | new->size = size; | ||
83 | new->offset = offset; | ||
84 | new->guard_ptr = guard_ptr; | ||
85 | new->guard_val = guard_val; | ||
86 | |||
87 | return new; | ||
88 | } | ||
89 | |||
90 | |||
91 | /* Parse SPE ELF header and generate a list of vma_maps. | ||
92 | * A pointer to the first vma_map in the generated list | ||
93 | * of vma_maps is returned. */ | ||
94 | struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu, | ||
95 | unsigned long spu_elf_start) | ||
96 | { | ||
97 | static const unsigned char expected[EI_PAD] = { | ||
98 | [EI_MAG0] = ELFMAG0, | ||
99 | [EI_MAG1] = ELFMAG1, | ||
100 | [EI_MAG2] = ELFMAG2, | ||
101 | [EI_MAG3] = ELFMAG3, | ||
102 | [EI_CLASS] = ELFCLASS32, | ||
103 | [EI_DATA] = ELFDATA2MSB, | ||
104 | [EI_VERSION] = EV_CURRENT, | ||
105 | [EI_OSABI] = ELFOSABI_NONE | ||
106 | }; | ||
107 | |||
108 | int grd_val; | ||
109 | struct vma_to_fileoffset_map *map = NULL; | ||
110 | struct spu_overlay_info ovly; | ||
111 | unsigned int overlay_tbl_offset = -1; | ||
112 | unsigned long phdr_start, shdr_start; | ||
113 | Elf32_Ehdr ehdr; | ||
114 | Elf32_Phdr phdr; | ||
115 | Elf32_Shdr shdr, shdr_str; | ||
116 | Elf32_Sym sym; | ||
117 | int i, j; | ||
118 | char name[32]; | ||
119 | |||
120 | unsigned int ovly_table_sym = 0; | ||
121 | unsigned int ovly_buf_table_sym = 0; | ||
122 | unsigned int ovly_table_end_sym = 0; | ||
123 | unsigned int ovly_buf_table_end_sym = 0; | ||
124 | unsigned long ovly_table; | ||
125 | unsigned int n_ovlys; | ||
126 | |||
127 | /* Get and validate ELF header. */ | ||
128 | |||
129 | if (copy_from_user(&ehdr, (void *) spu_elf_start, sizeof (ehdr))) | ||
130 | goto fail; | ||
131 | |||
132 | if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) { | ||
133 | printk(KERN_ERR "SPU_PROF: " | ||
134 | "%s, line %d: Unexpected e_ident parsing SPU ELF\n", | ||
135 | __FUNCTION__, __LINE__); | ||
136 | goto fail; | ||
137 | } | ||
138 | if (ehdr.e_machine != EM_SPU) { | ||
139 | printk(KERN_ERR "SPU_PROF: " | ||
140 | "%s, line %d: Unexpected e_machine parsing SPU ELF\n", | ||
141 | __FUNCTION__, __LINE__); | ||
142 | goto fail; | ||
143 | } | ||
144 | if (ehdr.e_type != ET_EXEC) { | ||
145 | printk(KERN_ERR "SPU_PROF: " | ||
146 | "%s, line %d: Unexpected e_type parsing SPU ELF\n", | ||
147 | __FUNCTION__, __LINE__); | ||
148 | goto fail; | ||
149 | } | ||
150 | phdr_start = spu_elf_start + ehdr.e_phoff; | ||
151 | shdr_start = spu_elf_start + ehdr.e_shoff; | ||
152 | |||
153 | /* Traverse program headers. */ | ||
154 | for (i = 0; i < ehdr.e_phnum; i++) { | ||
155 | if (copy_from_user(&phdr, | ||
156 | (void *) (phdr_start + i * sizeof(phdr)), | ||
157 | sizeof(phdr))) | ||
158 | goto fail; | ||
159 | |||
160 | if (phdr.p_type != PT_LOAD) | ||
161 | continue; | ||
162 | if (phdr.p_flags & (1 << 27)) | ||
163 | continue; | ||
164 | |||
165 | map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz, | ||
166 | phdr.p_offset, 0, 0); | ||
167 | if (!map) | ||
168 | goto fail; | ||
169 | } | ||
170 | |||
171 | pr_debug("SPU_PROF: Created non-overlay maps\n"); | ||
172 | /* Traverse section table and search for overlay-related symbols. */ | ||
173 | for (i = 0; i < ehdr.e_shnum; i++) { | ||
174 | if (copy_from_user(&shdr, | ||
175 | (void *) (shdr_start + i * sizeof(shdr)), | ||
176 | sizeof(shdr))) | ||
177 | goto fail; | ||
178 | |||
179 | if (shdr.sh_type != SHT_SYMTAB) | ||
180 | continue; | ||
181 | if (shdr.sh_entsize != sizeof (sym)) | ||
182 | continue; | ||
183 | |||
184 | if (copy_from_user(&shdr_str, | ||
185 | (void *) (shdr_start + shdr.sh_link * | ||
186 | sizeof(shdr)), | ||
187 | sizeof(shdr))) | ||
188 | goto fail; | ||
189 | |||
190 | if (shdr_str.sh_type != SHT_STRTAB) | ||
191 | goto fail;; | ||
192 | |||
193 | for (j = 0; j < shdr.sh_size / sizeof (sym); j++) { | ||
194 | if (copy_from_user(&sym, (void *) (spu_elf_start + | ||
195 | shdr.sh_offset + j * | ||
196 | sizeof (sym)), | ||
197 | sizeof (sym))) | ||
198 | goto fail; | ||
199 | |||
200 | if (copy_from_user(name, (void *) | ||
201 | (spu_elf_start + shdr_str.sh_offset + | ||
202 | sym.st_name), | ||
203 | 20)) | ||
204 | goto fail; | ||
205 | |||
206 | if (memcmp(name, "_ovly_table", 12) == 0) | ||
207 | ovly_table_sym = sym.st_value; | ||
208 | if (memcmp(name, "_ovly_buf_table", 16) == 0) | ||
209 | ovly_buf_table_sym = sym.st_value; | ||
210 | if (memcmp(name, "_ovly_table_end", 16) == 0) | ||
211 | ovly_table_end_sym = sym.st_value; | ||
212 | if (memcmp(name, "_ovly_buf_table_end", 20) == 0) | ||
213 | ovly_buf_table_end_sym = sym.st_value; | ||
214 | } | ||
215 | } | ||
216 | |||
217 | /* If we don't have overlays, we're done. */ | ||
218 | if (ovly_table_sym == 0 || ovly_buf_table_sym == 0 | ||
219 | || ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) { | ||
220 | pr_debug("SPU_PROF: No overlay table found\n"); | ||
221 | goto out; | ||
222 | } else { | ||
223 | pr_debug("SPU_PROF: Overlay table found\n"); | ||
224 | } | ||
225 | |||
226 | /* The _ovly_table symbol represents a table with one entry | ||
227 | * per overlay section. The _ovly_buf_table symbol represents | ||
228 | * a table with one entry per overlay region. | ||
229 | * The struct spu_overlay_info gives the structure of the _ovly_table | ||
230 | * entries. The structure of _ovly_table_buf is simply one | ||
231 | * u32 word per entry. | ||
232 | */ | ||
233 | overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym, | ||
234 | aSpu, &grd_val); | ||
235 | if (overlay_tbl_offset < 0) { | ||
236 | printk(KERN_ERR "SPU_PROF: " | ||
237 | "%s, line %d: Error finding SPU overlay table\n", | ||
238 | __FUNCTION__, __LINE__); | ||
239 | goto fail; | ||
240 | } | ||
241 | ovly_table = spu_elf_start + overlay_tbl_offset; | ||
242 | |||
243 | n_ovlys = (ovly_table_end_sym - | ||
244 | ovly_table_sym) / sizeof (ovly); | ||
245 | |||
246 | /* Traverse overlay table. */ | ||
247 | for (i = 0; i < n_ovlys; i++) { | ||
248 | if (copy_from_user(&ovly, (void *) | ||
249 | (ovly_table + i * sizeof (ovly)), | ||
250 | sizeof (ovly))) | ||
251 | goto fail; | ||
252 | |||
253 | /* The ovly.vma/size/offset arguments are analogous to the same | ||
254 | * arguments used above for non-overlay maps. The final two | ||
255 | * args are referred to as the guard pointer and the guard | ||
256 | * value. | ||
257 | * The guard pointer is an entry in the _ovly_buf_table, | ||
258 | * computed using ovly.buf as the index into the table. Since | ||
259 | * ovly.buf values begin at '1' to reference the first (or 0th) | ||
260 | * entry in the _ovly_buf_table, the computation subtracts 1 | ||
261 | * from ovly.buf. | ||
262 | * The guard value is stored in the _ovly_buf_table entry and | ||
263 | * is an index (starting at 1) back to the _ovly_table entry | ||
264 | * that is pointing at this _ovly_buf_table entry. So, for | ||
265 | * example, for an overlay scenario with one overlay segment | ||
266 | * and two overlay sections: | ||
267 | * - Section 1 points to the first entry of the | ||
268 | * _ovly_buf_table, which contains a guard value | ||
269 | * of '1', referencing the first (index=0) entry of | ||
270 | * _ovly_table. | ||
271 | * - Section 2 points to the second entry of the | ||
272 | * _ovly_buf_table, which contains a guard value | ||
273 | * of '2', referencing the second (index=1) entry of | ||
274 | * _ovly_table. | ||
275 | */ | ||
276 | map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset, | ||
277 | ovly_buf_table_sym + (ovly.buf-1) * 4, i+1); | ||
278 | if (!map) | ||
279 | goto fail; | ||
280 | } | ||
281 | goto out; | ||
282 | |||
283 | fail: | ||
284 | map = NULL; | ||
285 | out: | ||
286 | return map; | ||
287 | } | ||
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c index 1a7ef7e246d2..a28cce1d6c24 100644 --- a/arch/powerpc/oprofile/common.c +++ b/arch/powerpc/oprofile/common.c | |||
@@ -29,6 +29,8 @@ static struct op_powerpc_model *model; | |||
29 | static struct op_counter_config ctr[OP_MAX_COUNTER]; | 29 | static struct op_counter_config ctr[OP_MAX_COUNTER]; |
30 | static struct op_system_config sys; | 30 | static struct op_system_config sys; |
31 | 31 | ||
32 | static int op_per_cpu_rc; | ||
33 | |||
32 | static void op_handle_interrupt(struct pt_regs *regs) | 34 | static void op_handle_interrupt(struct pt_regs *regs) |
33 | { | 35 | { |
34 | model->handle_interrupt(regs, ctr); | 36 | model->handle_interrupt(regs, ctr); |
@@ -36,25 +38,41 @@ static void op_handle_interrupt(struct pt_regs *regs) | |||
36 | 38 | ||
37 | static void op_powerpc_cpu_setup(void *dummy) | 39 | static void op_powerpc_cpu_setup(void *dummy) |
38 | { | 40 | { |
39 | model->cpu_setup(ctr); | 41 | int ret; |
42 | |||
43 | ret = model->cpu_setup(ctr); | ||
44 | |||
45 | if (ret != 0) | ||
46 | op_per_cpu_rc = ret; | ||
40 | } | 47 | } |
41 | 48 | ||
42 | static int op_powerpc_setup(void) | 49 | static int op_powerpc_setup(void) |
43 | { | 50 | { |
44 | int err; | 51 | int err; |
45 | 52 | ||
53 | op_per_cpu_rc = 0; | ||
54 | |||
46 | /* Grab the hardware */ | 55 | /* Grab the hardware */ |
47 | err = reserve_pmc_hardware(op_handle_interrupt); | 56 | err = reserve_pmc_hardware(op_handle_interrupt); |
48 | if (err) | 57 | if (err) |
49 | return err; | 58 | return err; |
50 | 59 | ||
51 | /* Pre-compute the values to stuff in the hardware registers. */ | 60 | /* Pre-compute the values to stuff in the hardware registers. */ |
52 | model->reg_setup(ctr, &sys, model->num_counters); | 61 | op_per_cpu_rc = model->reg_setup(ctr, &sys, model->num_counters); |
53 | 62 | ||
54 | /* Configure the registers on all cpus. */ | 63 | if (op_per_cpu_rc) |
64 | goto out; | ||
65 | |||
66 | /* Configure the registers on all cpus. If an error occurs on one | ||
67 | * of the cpus, op_per_cpu_rc will be set to the error */ | ||
55 | on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1); | 68 | on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1); |
56 | 69 | ||
57 | return 0; | 70 | out: if (op_per_cpu_rc) { |
71 | /* error on setup release the performance counter hardware */ | ||
72 | release_pmc_hardware(); | ||
73 | } | ||
74 | |||
75 | return op_per_cpu_rc; | ||
58 | } | 76 | } |
59 | 77 | ||
60 | static void op_powerpc_shutdown(void) | 78 | static void op_powerpc_shutdown(void) |
@@ -64,16 +82,29 @@ static void op_powerpc_shutdown(void) | |||
64 | 82 | ||
65 | static void op_powerpc_cpu_start(void *dummy) | 83 | static void op_powerpc_cpu_start(void *dummy) |
66 | { | 84 | { |
67 | model->start(ctr); | 85 | /* If any of the cpus have return an error, set the |
86 | * global flag to the error so it can be returned | ||
87 | * to the generic OProfile caller. | ||
88 | */ | ||
89 | int ret; | ||
90 | |||
91 | ret = model->start(ctr); | ||
92 | if (ret != 0) | ||
93 | op_per_cpu_rc = ret; | ||
68 | } | 94 | } |
69 | 95 | ||
70 | static int op_powerpc_start(void) | 96 | static int op_powerpc_start(void) |
71 | { | 97 | { |
98 | op_per_cpu_rc = 0; | ||
99 | |||
72 | if (model->global_start) | 100 | if (model->global_start) |
73 | model->global_start(ctr); | 101 | return model->global_start(ctr); |
74 | if (model->start) | 102 | if (model->start) { |
75 | on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1); | 103 | on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1); |
76 | return 0; | 104 | return op_per_cpu_rc; |
105 | } | ||
106 | return -EIO; /* No start function is defined for this | ||
107 | power architecture */ | ||
77 | } | 108 | } |
78 | 109 | ||
79 | static inline void op_powerpc_cpu_stop(void *dummy) | 110 | static inline void op_powerpc_cpu_stop(void *dummy) |
@@ -147,11 +178,13 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) | |||
147 | 178 | ||
148 | switch (cur_cpu_spec->oprofile_type) { | 179 | switch (cur_cpu_spec->oprofile_type) { |
149 | #ifdef CONFIG_PPC64 | 180 | #ifdef CONFIG_PPC64 |
150 | #ifdef CONFIG_PPC_CELL_NATIVE | 181 | #ifdef CONFIG_OPROFILE_CELL |
151 | case PPC_OPROFILE_CELL: | 182 | case PPC_OPROFILE_CELL: |
152 | if (firmware_has_feature(FW_FEATURE_LPAR)) | 183 | if (firmware_has_feature(FW_FEATURE_LPAR)) |
153 | return -ENODEV; | 184 | return -ENODEV; |
154 | model = &op_model_cell; | 185 | model = &op_model_cell; |
186 | ops->sync_start = model->sync_start; | ||
187 | ops->sync_stop = model->sync_stop; | ||
155 | break; | 188 | break; |
156 | #endif | 189 | #endif |
157 | case PPC_OPROFILE_RS64: | 190 | case PPC_OPROFILE_RS64: |
diff --git a/arch/powerpc/oprofile/op_model_7450.c b/arch/powerpc/oprofile/op_model_7450.c index 5d1bbaf35ccb..cc599eb8768b 100644 --- a/arch/powerpc/oprofile/op_model_7450.c +++ b/arch/powerpc/oprofile/op_model_7450.c | |||
@@ -81,7 +81,7 @@ static void pmc_stop_ctrs(void) | |||
81 | 81 | ||
82 | /* Configures the counters on this CPU based on the global | 82 | /* Configures the counters on this CPU based on the global |
83 | * settings */ | 83 | * settings */ |
84 | static void fsl7450_cpu_setup(struct op_counter_config *ctr) | 84 | static int fsl7450_cpu_setup(struct op_counter_config *ctr) |
85 | { | 85 | { |
86 | /* freeze all counters */ | 86 | /* freeze all counters */ |
87 | pmc_stop_ctrs(); | 87 | pmc_stop_ctrs(); |
@@ -89,12 +89,14 @@ static void fsl7450_cpu_setup(struct op_counter_config *ctr) | |||
89 | mtspr(SPRN_MMCR0, mmcr0_val); | 89 | mtspr(SPRN_MMCR0, mmcr0_val); |
90 | mtspr(SPRN_MMCR1, mmcr1_val); | 90 | mtspr(SPRN_MMCR1, mmcr1_val); |
91 | mtspr(SPRN_MMCR2, mmcr2_val); | 91 | mtspr(SPRN_MMCR2, mmcr2_val); |
92 | |||
93 | return 0; | ||
92 | } | 94 | } |
93 | 95 | ||
94 | #define NUM_CTRS 6 | 96 | #define NUM_CTRS 6 |
95 | 97 | ||
96 | /* Configures the global settings for the countes on all CPUs. */ | 98 | /* Configures the global settings for the countes on all CPUs. */ |
97 | static void fsl7450_reg_setup(struct op_counter_config *ctr, | 99 | static int fsl7450_reg_setup(struct op_counter_config *ctr, |
98 | struct op_system_config *sys, | 100 | struct op_system_config *sys, |
99 | int num_ctrs) | 101 | int num_ctrs) |
100 | { | 102 | { |
@@ -126,10 +128,12 @@ static void fsl7450_reg_setup(struct op_counter_config *ctr, | |||
126 | | mmcr1_event6(ctr[5].event); | 128 | | mmcr1_event6(ctr[5].event); |
127 | 129 | ||
128 | mmcr2_val = 0; | 130 | mmcr2_val = 0; |
131 | |||
132 | return 0; | ||
129 | } | 133 | } |
130 | 134 | ||
131 | /* Sets the counters on this CPU to the chosen values, and starts them */ | 135 | /* Sets the counters on this CPU to the chosen values, and starts them */ |
132 | static void fsl7450_start(struct op_counter_config *ctr) | 136 | static int fsl7450_start(struct op_counter_config *ctr) |
133 | { | 137 | { |
134 | int i; | 138 | int i; |
135 | 139 | ||
@@ -148,6 +152,8 @@ static void fsl7450_start(struct op_counter_config *ctr) | |||
148 | pmc_start_ctrs(); | 152 | pmc_start_ctrs(); |
149 | 153 | ||
150 | oprofile_running = 1; | 154 | oprofile_running = 1; |
155 | |||
156 | return 0; | ||
151 | } | 157 | } |
152 | 158 | ||
153 | /* Stop the counters on this CPU */ | 159 | /* Stop the counters on this CPU */ |
@@ -193,7 +199,7 @@ static void fsl7450_handle_interrupt(struct pt_regs *regs, | |||
193 | /* The freeze bit was set by the interrupt. */ | 199 | /* The freeze bit was set by the interrupt. */ |
194 | /* Clear the freeze bit, and reenable the interrupt. | 200 | /* Clear the freeze bit, and reenable the interrupt. |
195 | * The counters won't actually start until the rfi clears | 201 | * The counters won't actually start until the rfi clears |
196 | * the PMM bit */ | 202 | * the PM/M bit */ |
197 | pmc_start_ctrs(); | 203 | pmc_start_ctrs(); |
198 | } | 204 | } |
199 | 205 | ||
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index c29293befba9..d928b54f3a0f 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c | |||
@@ -5,8 +5,8 @@ | |||
5 | * | 5 | * |
6 | * Author: David Erb (djerb@us.ibm.com) | 6 | * Author: David Erb (djerb@us.ibm.com) |
7 | * Modifications: | 7 | * Modifications: |
8 | * Carl Love <carll@us.ibm.com> | 8 | * Carl Love <carll@us.ibm.com> |
9 | * Maynard Johnson <maynardj@us.ibm.com> | 9 | * Maynard Johnson <maynardj@us.ibm.com> |
10 | * | 10 | * |
11 | * This program is free software; you can redistribute it and/or | 11 | * This program is free software; you can redistribute it and/or |
12 | * modify it under the terms of the GNU General Public License | 12 | * modify it under the terms of the GNU General Public License |
@@ -38,12 +38,25 @@ | |||
38 | 38 | ||
39 | #include "../platforms/cell/interrupt.h" | 39 | #include "../platforms/cell/interrupt.h" |
40 | #include "../platforms/cell/cbe_regs.h" | 40 | #include "../platforms/cell/cbe_regs.h" |
41 | #include "cell/pr_util.h" | ||
42 | |||
43 | static void cell_global_stop_spu(void); | ||
44 | |||
45 | /* | ||
46 | * spu_cycle_reset is the number of cycles between samples. | ||
47 | * This variable is used for SPU profiling and should ONLY be set | ||
48 | * at the beginning of cell_reg_setup; otherwise, it's read-only. | ||
49 | */ | ||
50 | static unsigned int spu_cycle_reset; | ||
51 | |||
52 | #define NUM_SPUS_PER_NODE 8 | ||
53 | #define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ | ||
41 | 54 | ||
42 | #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ | 55 | #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ |
43 | #define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying | 56 | #define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying |
44 | * PPU_CYCLES event | 57 | * PPU_CYCLES event |
45 | */ | 58 | */ |
46 | #define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ | 59 | #define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ |
47 | 60 | ||
48 | #define NUM_THREADS 2 /* number of physical threads in | 61 | #define NUM_THREADS 2 /* number of physical threads in |
49 | * physical processor | 62 | * physical processor |
@@ -51,6 +64,7 @@ | |||
51 | #define NUM_TRACE_BUS_WORDS 4 | 64 | #define NUM_TRACE_BUS_WORDS 4 |
52 | #define NUM_INPUT_BUS_WORDS 2 | 65 | #define NUM_INPUT_BUS_WORDS 2 |
53 | 66 | ||
67 | #define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ | ||
54 | 68 | ||
55 | struct pmc_cntrl_data { | 69 | struct pmc_cntrl_data { |
56 | unsigned long vcntr; | 70 | unsigned long vcntr; |
@@ -62,11 +76,10 @@ struct pmc_cntrl_data { | |||
62 | /* | 76 | /* |
63 | * ibm,cbe-perftools rtas parameters | 77 | * ibm,cbe-perftools rtas parameters |
64 | */ | 78 | */ |
65 | |||
66 | struct pm_signal { | 79 | struct pm_signal { |
67 | u16 cpu; /* Processor to modify */ | 80 | u16 cpu; /* Processor to modify */ |
68 | u16 sub_unit; /* hw subunit this applies to (if applicable) */ | 81 | u16 sub_unit; /* hw subunit this applies to (if applicable)*/ |
69 | short int signal_group; /* Signal Group to Enable/Disable */ | 82 | short int signal_group; /* Signal Group to Enable/Disable */ |
70 | u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event | 83 | u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event |
71 | * Bus Word(s) (bitmask) | 84 | * Bus Word(s) (bitmask) |
72 | */ | 85 | */ |
@@ -112,21 +125,42 @@ static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); | |||
112 | 125 | ||
113 | static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; | 126 | static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; |
114 | 127 | ||
115 | /* Interpetation of hdw_thread: | 128 | /* |
129 | * The CELL profiling code makes rtas calls to setup the debug bus to | ||
130 | * route the performance signals. Additionally, SPU profiling requires | ||
131 | * a second rtas call to setup the hardware to capture the SPU PCs. | ||
132 | * The EIO error value is returned if the token lookups or the rtas | ||
133 | * call fail. The EIO error number is the best choice of the existing | ||
134 | * error numbers. The probability of rtas related error is very low. But | ||
135 | * by returning EIO and printing additional information to dmsg the user | ||
136 | * will know that OProfile did not start and dmesg will tell them why. | ||
137 | * OProfile does not support returning errors on Stop. Not a huge issue | ||
138 | * since failure to reset the debug bus or stop the SPU PC collection is | ||
139 | * not a fatel issue. Chances are if the Stop failed, Start doesn't work | ||
140 | * either. | ||
141 | */ | ||
142 | |||
143 | /* | ||
144 | * Interpetation of hdw_thread: | ||
116 | * 0 - even virtual cpus 0, 2, 4,... | 145 | * 0 - even virtual cpus 0, 2, 4,... |
117 | * 1 - odd virtual cpus 1, 3, 5, ... | 146 | * 1 - odd virtual cpus 1, 3, 5, ... |
147 | * | ||
148 | * FIXME: this is strictly wrong, we need to clean this up in a number | ||
149 | * of places. It works for now. -arnd | ||
118 | */ | 150 | */ |
119 | static u32 hdw_thread; | 151 | static u32 hdw_thread; |
120 | 152 | ||
121 | static u32 virt_cntr_inter_mask; | 153 | static u32 virt_cntr_inter_mask; |
122 | static struct timer_list timer_virt_cntr; | 154 | static struct timer_list timer_virt_cntr; |
123 | 155 | ||
124 | /* pm_signal needs to be global since it is initialized in | 156 | /* |
157 | * pm_signal needs to be global since it is initialized in | ||
125 | * cell_reg_setup at the time when the necessary information | 158 | * cell_reg_setup at the time when the necessary information |
126 | * is available. | 159 | * is available. |
127 | */ | 160 | */ |
128 | static struct pm_signal pm_signal[NR_PHYS_CTRS]; | 161 | static struct pm_signal pm_signal[NR_PHYS_CTRS]; |
129 | static int pm_rtas_token; | 162 | static int pm_rtas_token; /* token for debug bus setup call */ |
163 | static int spu_rtas_token; /* token for SPU cycle profiling */ | ||
130 | 164 | ||
131 | static u32 reset_value[NR_PHYS_CTRS]; | 165 | static u32 reset_value[NR_PHYS_CTRS]; |
132 | static int num_counters; | 166 | static int num_counters; |
@@ -147,8 +181,8 @@ rtas_ibm_cbe_perftools(int subfunc, int passthru, | |||
147 | { | 181 | { |
148 | u64 paddr = __pa(address); | 182 | u64 paddr = __pa(address); |
149 | 183 | ||
150 | return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru, | 184 | return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, |
151 | paddr >> 32, paddr & 0xffffffff, length); | 185 | passthru, paddr >> 32, paddr & 0xffffffff, length); |
152 | } | 186 | } |
153 | 187 | ||
154 | static void pm_rtas_reset_signals(u32 node) | 188 | static void pm_rtas_reset_signals(u32 node) |
@@ -156,12 +190,13 @@ static void pm_rtas_reset_signals(u32 node) | |||
156 | int ret; | 190 | int ret; |
157 | struct pm_signal pm_signal_local; | 191 | struct pm_signal pm_signal_local; |
158 | 192 | ||
159 | /* The debug bus is being set to the passthru disable state. | 193 | /* |
160 | * However, the FW still expects atleast one legal signal routing | 194 | * The debug bus is being set to the passthru disable state. |
161 | * entry or it will return an error on the arguments. If we don't | 195 | * However, the FW still expects atleast one legal signal routing |
162 | * supply a valid entry, we must ignore all return values. Ignoring | 196 | * entry or it will return an error on the arguments. If we don't |
163 | * all return values means we might miss an error we should be | 197 | * supply a valid entry, we must ignore all return values. Ignoring |
164 | * concerned about. | 198 | * all return values means we might miss an error we should be |
199 | * concerned about. | ||
165 | */ | 200 | */ |
166 | 201 | ||
167 | /* fw expects physical cpu #. */ | 202 | /* fw expects physical cpu #. */ |
@@ -175,18 +210,24 @@ static void pm_rtas_reset_signals(u32 node) | |||
175 | &pm_signal_local, | 210 | &pm_signal_local, |
176 | sizeof(struct pm_signal)); | 211 | sizeof(struct pm_signal)); |
177 | 212 | ||
178 | if (ret) | 213 | if (unlikely(ret)) |
214 | /* | ||
215 | * Not a fatal error. For Oprofile stop, the oprofile | ||
216 | * functions do not support returning an error for | ||
217 | * failure to stop OProfile. | ||
218 | */ | ||
179 | printk(KERN_WARNING "%s: rtas returned: %d\n", | 219 | printk(KERN_WARNING "%s: rtas returned: %d\n", |
180 | __FUNCTION__, ret); | 220 | __FUNCTION__, ret); |
181 | } | 221 | } |
182 | 222 | ||
183 | static void pm_rtas_activate_signals(u32 node, u32 count) | 223 | static int pm_rtas_activate_signals(u32 node, u32 count) |
184 | { | 224 | { |
185 | int ret; | 225 | int ret; |
186 | int i, j; | 226 | int i, j; |
187 | struct pm_signal pm_signal_local[NR_PHYS_CTRS]; | 227 | struct pm_signal pm_signal_local[NR_PHYS_CTRS]; |
188 | 228 | ||
189 | /* There is no debug setup required for the cycles event. | 229 | /* |
230 | * There is no debug setup required for the cycles event. | ||
190 | * Note that only events in the same group can be used. | 231 | * Note that only events in the same group can be used. |
191 | * Otherwise, there will be conflicts in correctly routing | 232 | * Otherwise, there will be conflicts in correctly routing |
192 | * the signals on the debug bus. It is the responsiblity | 233 | * the signals on the debug bus. It is the responsiblity |
@@ -213,10 +254,14 @@ static void pm_rtas_activate_signals(u32 node, u32 count) | |||
213 | pm_signal_local, | 254 | pm_signal_local, |
214 | i * sizeof(struct pm_signal)); | 255 | i * sizeof(struct pm_signal)); |
215 | 256 | ||
216 | if (ret) | 257 | if (unlikely(ret)) { |
217 | printk(KERN_WARNING "%s: rtas returned: %d\n", | 258 | printk(KERN_WARNING "%s: rtas returned: %d\n", |
218 | __FUNCTION__, ret); | 259 | __FUNCTION__, ret); |
260 | return -EIO; | ||
261 | } | ||
219 | } | 262 | } |
263 | |||
264 | return 0; | ||
220 | } | 265 | } |
221 | 266 | ||
222 | /* | 267 | /* |
@@ -260,11 +305,12 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) | |||
260 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); | 305 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); |
261 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); | 306 | pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); |
262 | 307 | ||
263 | /* Some of the islands signal selection is based on 64 bit words. | 308 | /* |
309 | * Some of the islands signal selection is based on 64 bit words. | ||
264 | * The debug bus words are 32 bits, the input words to the performance | 310 | * The debug bus words are 32 bits, the input words to the performance |
265 | * counters are defined as 32 bits. Need to convert the 64 bit island | 311 | * counters are defined as 32 bits. Need to convert the 64 bit island |
266 | * specification to the appropriate 32 input bit and bus word for the | 312 | * specification to the appropriate 32 input bit and bus word for the |
267 | * performance counter event selection. See the CELL Performance | 313 | * performance counter event selection. See the CELL Performance |
268 | * monitoring signals manual and the Perf cntr hardware descriptions | 314 | * monitoring signals manual and the Perf cntr hardware descriptions |
269 | * for the details. | 315 | * for the details. |
270 | */ | 316 | */ |
@@ -298,6 +344,7 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask) | |||
298 | input_bus[j] = i; | 344 | input_bus[j] = i; |
299 | pm_regs.group_control |= | 345 | pm_regs.group_control |= |
300 | (i << (31 - i)); | 346 | (i << (31 - i)); |
347 | |||
301 | break; | 348 | break; |
302 | } | 349 | } |
303 | } | 350 | } |
@@ -309,7 +356,8 @@ out: | |||
309 | 356 | ||
310 | static void write_pm_cntrl(int cpu) | 357 | static void write_pm_cntrl(int cpu) |
311 | { | 358 | { |
312 | /* Oprofile will use 32 bit counters, set bits 7:10 to 0 | 359 | /* |
360 | * Oprofile will use 32 bit counters, set bits 7:10 to 0 | ||
313 | * pmregs.pm_cntrl is a global | 361 | * pmregs.pm_cntrl is a global |
314 | */ | 362 | */ |
315 | 363 | ||
@@ -326,7 +374,8 @@ static void write_pm_cntrl(int cpu) | |||
326 | if (pm_regs.pm_cntrl.freeze == 1) | 374 | if (pm_regs.pm_cntrl.freeze == 1) |
327 | val |= CBE_PM_FREEZE_ALL_CTRS; | 375 | val |= CBE_PM_FREEZE_ALL_CTRS; |
328 | 376 | ||
329 | /* Routine set_count_mode must be called previously to set | 377 | /* |
378 | * Routine set_count_mode must be called previously to set | ||
330 | * the count mode based on the user selection of user and kernel. | 379 | * the count mode based on the user selection of user and kernel. |
331 | */ | 380 | */ |
332 | val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); | 381 | val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); |
@@ -336,7 +385,8 @@ static void write_pm_cntrl(int cpu) | |||
336 | static inline void | 385 | static inline void |
337 | set_count_mode(u32 kernel, u32 user) | 386 | set_count_mode(u32 kernel, u32 user) |
338 | { | 387 | { |
339 | /* The user must specify user and kernel if they want them. If | 388 | /* |
389 | * The user must specify user and kernel if they want them. If | ||
340 | * neither is specified, OProfile will count in hypervisor mode. | 390 | * neither is specified, OProfile will count in hypervisor mode. |
341 | * pm_regs.pm_cntrl is a global | 391 | * pm_regs.pm_cntrl is a global |
342 | */ | 392 | */ |
@@ -364,7 +414,7 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl) | |||
364 | 414 | ||
365 | /* | 415 | /* |
366 | * Oprofile is expected to collect data on all CPUs simultaneously. | 416 | * Oprofile is expected to collect data on all CPUs simultaneously. |
367 | * However, there is one set of performance counters per node. There are | 417 | * However, there is one set of performance counters per node. There are |
368 | * two hardware threads or virtual CPUs on each node. Hence, OProfile must | 418 | * two hardware threads or virtual CPUs on each node. Hence, OProfile must |
369 | * multiplex in time the performance counter collection on the two virtual | 419 | * multiplex in time the performance counter collection on the two virtual |
370 | * CPUs. The multiplexing of the performance counters is done by this | 420 | * CPUs. The multiplexing of the performance counters is done by this |
@@ -377,19 +427,19 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl) | |||
377 | * pair of per-cpu arrays is used for storing the previous and next | 427 | * pair of per-cpu arrays is used for storing the previous and next |
378 | * pmc values for a given node. | 428 | * pmc values for a given node. |
379 | * NOTE: We use the per-cpu variable to improve cache performance. | 429 | * NOTE: We use the per-cpu variable to improve cache performance. |
430 | * | ||
431 | * This routine will alternate loading the virtual counters for | ||
432 | * virtual CPUs | ||
380 | */ | 433 | */ |
381 | static void cell_virtual_cntr(unsigned long data) | 434 | static void cell_virtual_cntr(unsigned long data) |
382 | { | 435 | { |
383 | /* This routine will alternate loading the virtual counters for | ||
384 | * virtual CPUs | ||
385 | */ | ||
386 | int i, prev_hdw_thread, next_hdw_thread; | 436 | int i, prev_hdw_thread, next_hdw_thread; |
387 | u32 cpu; | 437 | u32 cpu; |
388 | unsigned long flags; | 438 | unsigned long flags; |
389 | 439 | ||
390 | /* Make sure that the interrupt_hander and | 440 | /* |
391 | * the virt counter are not both playing with | 441 | * Make sure that the interrupt_hander and the virt counter are |
392 | * the counters on the same node. | 442 | * not both playing with the counters on the same node. |
393 | */ | 443 | */ |
394 | 444 | ||
395 | spin_lock_irqsave(&virt_cntr_lock, flags); | 445 | spin_lock_irqsave(&virt_cntr_lock, flags); |
@@ -400,22 +450,25 @@ static void cell_virtual_cntr(unsigned long data) | |||
400 | hdw_thread = 1 ^ hdw_thread; | 450 | hdw_thread = 1 ^ hdw_thread; |
401 | next_hdw_thread = hdw_thread; | 451 | next_hdw_thread = hdw_thread; |
402 | 452 | ||
403 | for (i = 0; i < num_counters; i++) | 453 | /* |
404 | /* There are some per thread events. Must do the | 454 | * There are some per thread events. Must do the |
405 | * set event, for the thread that is being started | 455 | * set event, for the thread that is being started |
406 | */ | 456 | */ |
457 | for (i = 0; i < num_counters; i++) | ||
407 | set_pm_event(i, | 458 | set_pm_event(i, |
408 | pmc_cntrl[next_hdw_thread][i].evnts, | 459 | pmc_cntrl[next_hdw_thread][i].evnts, |
409 | pmc_cntrl[next_hdw_thread][i].masks); | 460 | pmc_cntrl[next_hdw_thread][i].masks); |
410 | 461 | ||
411 | /* The following is done only once per each node, but | 462 | /* |
463 | * The following is done only once per each node, but | ||
412 | * we need cpu #, not node #, to pass to the cbe_xxx functions. | 464 | * we need cpu #, not node #, to pass to the cbe_xxx functions. |
413 | */ | 465 | */ |
414 | for_each_online_cpu(cpu) { | 466 | for_each_online_cpu(cpu) { |
415 | if (cbe_get_hw_thread_id(cpu)) | 467 | if (cbe_get_hw_thread_id(cpu)) |
416 | continue; | 468 | continue; |
417 | 469 | ||
418 | /* stop counters, save counter values, restore counts | 470 | /* |
471 | * stop counters, save counter values, restore counts | ||
419 | * for previous thread | 472 | * for previous thread |
420 | */ | 473 | */ |
421 | cbe_disable_pm(cpu); | 474 | cbe_disable_pm(cpu); |
@@ -428,7 +481,7 @@ static void cell_virtual_cntr(unsigned long data) | |||
428 | == 0xFFFFFFFF) | 481 | == 0xFFFFFFFF) |
429 | /* If the cntr value is 0xffffffff, we must | 482 | /* If the cntr value is 0xffffffff, we must |
430 | * reset that to 0xfffffff0 when the current | 483 | * reset that to 0xfffffff0 when the current |
431 | * thread is restarted. This will generate a | 484 | * thread is restarted. This will generate a |
432 | * new interrupt and make sure that we never | 485 | * new interrupt and make sure that we never |
433 | * restore the counters to the max value. If | 486 | * restore the counters to the max value. If |
434 | * the counters were restored to the max value, | 487 | * the counters were restored to the max value, |
@@ -444,13 +497,15 @@ static void cell_virtual_cntr(unsigned long data) | |||
444 | next_hdw_thread)[i]); | 497 | next_hdw_thread)[i]); |
445 | } | 498 | } |
446 | 499 | ||
447 | /* Switch to the other thread. Change the interrupt | 500 | /* |
501 | * Switch to the other thread. Change the interrupt | ||
448 | * and control regs to be scheduled on the CPU | 502 | * and control regs to be scheduled on the CPU |
449 | * corresponding to the thread to execute. | 503 | * corresponding to the thread to execute. |
450 | */ | 504 | */ |
451 | for (i = 0; i < num_counters; i++) { | 505 | for (i = 0; i < num_counters; i++) { |
452 | if (pmc_cntrl[next_hdw_thread][i].enabled) { | 506 | if (pmc_cntrl[next_hdw_thread][i].enabled) { |
453 | /* There are some per thread events. | 507 | /* |
508 | * There are some per thread events. | ||
454 | * Must do the set event, enable_cntr | 509 | * Must do the set event, enable_cntr |
455 | * for each cpu. | 510 | * for each cpu. |
456 | */ | 511 | */ |
@@ -482,17 +537,42 @@ static void start_virt_cntrs(void) | |||
482 | } | 537 | } |
483 | 538 | ||
484 | /* This function is called once for all cpus combined */ | 539 | /* This function is called once for all cpus combined */ |
485 | static void | 540 | static int cell_reg_setup(struct op_counter_config *ctr, |
486 | cell_reg_setup(struct op_counter_config *ctr, | 541 | struct op_system_config *sys, int num_ctrs) |
487 | struct op_system_config *sys, int num_ctrs) | ||
488 | { | 542 | { |
489 | int i, j, cpu; | 543 | int i, j, cpu; |
544 | spu_cycle_reset = 0; | ||
545 | |||
546 | if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { | ||
547 | spu_cycle_reset = ctr[0].count; | ||
548 | |||
549 | /* | ||
550 | * Each node will need to make the rtas call to start | ||
551 | * and stop SPU profiling. Get the token once and store it. | ||
552 | */ | ||
553 | spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); | ||
554 | |||
555 | if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { | ||
556 | printk(KERN_ERR | ||
557 | "%s: rtas token ibm,cbe-spu-perftools unknown\n", | ||
558 | __FUNCTION__); | ||
559 | return -EIO; | ||
560 | } | ||
561 | } | ||
490 | 562 | ||
491 | pm_rtas_token = rtas_token("ibm,cbe-perftools"); | 563 | pm_rtas_token = rtas_token("ibm,cbe-perftools"); |
492 | if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) { | 564 | |
493 | printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n", | 565 | /* |
566 | * For all events excetp PPU CYCLEs, each node will need to make | ||
567 | * the rtas cbe-perftools call to setup and reset the debug bus. | ||
568 | * Make the token lookup call once and store it in the global | ||
569 | * variable pm_rtas_token. | ||
570 | */ | ||
571 | if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { | ||
572 | printk(KERN_ERR | ||
573 | "%s: rtas token ibm,cbe-perftools unknown\n", | ||
494 | __FUNCTION__); | 574 | __FUNCTION__); |
495 | goto out; | 575 | return -EIO; |
496 | } | 576 | } |
497 | 577 | ||
498 | num_counters = num_ctrs; | 578 | num_counters = num_ctrs; |
@@ -520,7 +600,8 @@ cell_reg_setup(struct op_counter_config *ctr, | |||
520 | per_cpu(pmc_values, j)[i] = 0; | 600 | per_cpu(pmc_values, j)[i] = 0; |
521 | } | 601 | } |
522 | 602 | ||
523 | /* Setup the thread 1 events, map the thread 0 event to the | 603 | /* |
604 | * Setup the thread 1 events, map the thread 0 event to the | ||
524 | * equivalent thread 1 event. | 605 | * equivalent thread 1 event. |
525 | */ | 606 | */ |
526 | for (i = 0; i < num_ctrs; ++i) { | 607 | for (i = 0; i < num_ctrs; ++i) { |
@@ -544,9 +625,10 @@ cell_reg_setup(struct op_counter_config *ctr, | |||
544 | for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) | 625 | for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) |
545 | input_bus[i] = 0xff; | 626 | input_bus[i] = 0xff; |
546 | 627 | ||
547 | /* Our counters count up, and "count" refers to | 628 | /* |
629 | * Our counters count up, and "count" refers to | ||
548 | * how much before the next interrupt, and we interrupt | 630 | * how much before the next interrupt, and we interrupt |
549 | * on overflow. So we calculate the starting value | 631 | * on overflow. So we calculate the starting value |
550 | * which will give us "count" until overflow. | 632 | * which will give us "count" until overflow. |
551 | * Then we set the events on the enabled counters. | 633 | * Then we set the events on the enabled counters. |
552 | */ | 634 | */ |
@@ -569,28 +651,27 @@ cell_reg_setup(struct op_counter_config *ctr, | |||
569 | for (i = 0; i < num_counters; ++i) { | 651 | for (i = 0; i < num_counters; ++i) { |
570 | per_cpu(pmc_values, cpu)[i] = reset_value[i]; | 652 | per_cpu(pmc_values, cpu)[i] = reset_value[i]; |
571 | } | 653 | } |
572 | out: | 654 | |
573 | ; | 655 | return 0; |
574 | } | 656 | } |
575 | 657 | ||
658 | |||
659 | |||
576 | /* This function is called once for each cpu */ | 660 | /* This function is called once for each cpu */ |
577 | static void cell_cpu_setup(struct op_counter_config *cntr) | 661 | static int cell_cpu_setup(struct op_counter_config *cntr) |
578 | { | 662 | { |
579 | u32 cpu = smp_processor_id(); | 663 | u32 cpu = smp_processor_id(); |
580 | u32 num_enabled = 0; | 664 | u32 num_enabled = 0; |
581 | int i; | 665 | int i; |
582 | 666 | ||
667 | if (spu_cycle_reset) | ||
668 | return 0; | ||
669 | |||
583 | /* There is one performance monitor per processor chip (i.e. node), | 670 | /* There is one performance monitor per processor chip (i.e. node), |
584 | * so we only need to perform this function once per node. | 671 | * so we only need to perform this function once per node. |
585 | */ | 672 | */ |
586 | if (cbe_get_hw_thread_id(cpu)) | 673 | if (cbe_get_hw_thread_id(cpu)) |
587 | goto out; | 674 | return 0; |
588 | |||
589 | if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) { | ||
590 | printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n", | ||
591 | __FUNCTION__); | ||
592 | goto out; | ||
593 | } | ||
594 | 675 | ||
595 | /* Stop all counters */ | 676 | /* Stop all counters */ |
596 | cbe_disable_pm(cpu); | 677 | cbe_disable_pm(cpu); |
@@ -609,16 +690,286 @@ static void cell_cpu_setup(struct op_counter_config *cntr) | |||
609 | } | 690 | } |
610 | } | 691 | } |
611 | 692 | ||
612 | pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); | 693 | /* |
694 | * The pm_rtas_activate_signals will return -EIO if the FW | ||
695 | * call failed. | ||
696 | */ | ||
697 | return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); | ||
698 | } | ||
699 | |||
700 | #define ENTRIES 303 | ||
701 | #define MAXLFSR 0xFFFFFF | ||
702 | |||
703 | /* precomputed table of 24 bit LFSR values */ | ||
704 | static int initial_lfsr[] = { | ||
705 | 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424, | ||
706 | 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716, | ||
707 | 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547, | ||
708 | 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392, | ||
709 | 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026, | ||
710 | 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556, | ||
711 | 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769, | ||
712 | 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893, | ||
713 | 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017, | ||
714 | 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756, | ||
715 | 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558, | ||
716 | 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401, | ||
717 | 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720, | ||
718 | 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042, | ||
719 | 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955, | ||
720 | 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934, | ||
721 | 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783, | ||
722 | 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278, | ||
723 | 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051, | ||
724 | 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741, | ||
725 | 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972, | ||
726 | 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302, | ||
727 | 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384, | ||
728 | 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469, | ||
729 | 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697, | ||
730 | 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398, | ||
731 | 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140, | ||
732 | 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214, | ||
733 | 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386, | ||
734 | 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087, | ||
735 | 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130, | ||
736 | 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300, | ||
737 | 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475, | ||
738 | 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950, | ||
739 | 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003, | ||
740 | 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375, | ||
741 | 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426, | ||
742 | 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607 | ||
743 | }; | ||
744 | |||
745 | /* | ||
746 | * The hardware uses an LFSR counting sequence to determine when to capture | ||
747 | * the SPU PCs. An LFSR sequence is like a puesdo random number sequence | ||
748 | * where each number occurs once in the sequence but the sequence is not in | ||
749 | * numerical order. The SPU PC capture is done when the LFSR sequence reaches | ||
750 | * the last value in the sequence. Hence the user specified value N | ||
751 | * corresponds to the LFSR number that is N from the end of the sequence. | ||
752 | * | ||
753 | * To avoid the time to compute the LFSR, a lookup table is used. The 24 bit | ||
754 | * LFSR sequence is broken into four ranges. The spacing of the precomputed | ||
755 | * values is adjusted in each range so the error between the user specifed | ||
756 | * number (N) of events between samples and the actual number of events based | ||
757 | * on the precomputed value will be les then about 6.2%. Note, if the user | ||
758 | * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used. | ||
759 | * This is to prevent the loss of samples because the trace buffer is full. | ||
760 | * | ||
761 | * User specified N Step between Index in | ||
762 | * precomputed values precomputed | ||
763 | * table | ||
764 | * 0 to 2^16-1 ---- 0 | ||
765 | * 2^16 to 2^16+2^19-1 2^12 1 to 128 | ||
766 | * 2^16+2^19 to 2^16+2^19+2^22-1 2^15 129 to 256 | ||
767 | * 2^16+2^19+2^22 to 2^24-1 2^18 257 to 302 | ||
768 | * | ||
769 | * | ||
770 | * For example, the LFSR values in the second range are computed for 2^16, | ||
771 | * 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies | ||
772 | * 1, 2,..., 127, 128. | ||
773 | * | ||
774 | * The 24 bit LFSR value for the nth number in the sequence can be | ||
775 | * calculated using the following code: | ||
776 | * | ||
777 | * #define size 24 | ||
778 | * int calculate_lfsr(int n) | ||
779 | * { | ||
780 | * int i; | ||
781 | * unsigned int newlfsr0; | ||
782 | * unsigned int lfsr = 0xFFFFFF; | ||
783 | * unsigned int howmany = n; | ||
784 | * | ||
785 | * for (i = 2; i < howmany + 2; i++) { | ||
786 | * newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^ | ||
787 | * ((lfsr >> (size - 1 - 1)) & 1) ^ | ||
788 | * (((lfsr >> (size - 1 - 6)) & 1) ^ | ||
789 | * ((lfsr >> (size - 1 - 23)) & 1))); | ||
790 | * | ||
791 | * lfsr >>= 1; | ||
792 | * lfsr = lfsr | (newlfsr0 << (size - 1)); | ||
793 | * } | ||
794 | * return lfsr; | ||
795 | * } | ||
796 | */ | ||
797 | |||
798 | #define V2_16 (0x1 << 16) | ||
799 | #define V2_19 (0x1 << 19) | ||
800 | #define V2_22 (0x1 << 22) | ||
801 | |||
802 | static int calculate_lfsr(int n) | ||
803 | { | ||
804 | /* | ||
805 | * The ranges and steps are in powers of 2 so the calculations | ||
806 | * can be done using shifts rather then divide. | ||
807 | */ | ||
808 | int index; | ||
809 | |||
810 | if ((n >> 16) == 0) | ||
811 | index = 0; | ||
812 | else if (((n - V2_16) >> 19) == 0) | ||
813 | index = ((n - V2_16) >> 12) + 1; | ||
814 | else if (((n - V2_16 - V2_19) >> 22) == 0) | ||
815 | index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128; | ||
816 | else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0) | ||
817 | index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256; | ||
818 | else | ||
819 | index = ENTRIES-1; | ||
820 | |||
821 | /* make sure index is valid */ | ||
822 | if ((index > ENTRIES) || (index < 0)) | ||
823 | index = ENTRIES-1; | ||
824 | |||
825 | return initial_lfsr[index]; | ||
826 | } | ||
827 | |||
828 | static int pm_rtas_activate_spu_profiling(u32 node) | ||
829 | { | ||
830 | int ret, i; | ||
831 | struct pm_signal pm_signal_local[NR_PHYS_CTRS]; | ||
832 | |||
833 | /* | ||
834 | * Set up the rtas call to configure the debug bus to | ||
835 | * route the SPU PCs. Setup the pm_signal for each SPU | ||
836 | */ | ||
837 | for (i = 0; i < NUM_SPUS_PER_NODE; i++) { | ||
838 | pm_signal_local[i].cpu = node; | ||
839 | pm_signal_local[i].signal_group = 41; | ||
840 | /* spu i on word (i/2) */ | ||
841 | pm_signal_local[i].bus_word = 1 << i / 2; | ||
842 | /* spu i */ | ||
843 | pm_signal_local[i].sub_unit = i; | ||
844 | pm_signal_local[i].bit = 63; | ||
845 | } | ||
846 | |||
847 | ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, | ||
848 | PASSTHRU_ENABLE, pm_signal_local, | ||
849 | (NUM_SPUS_PER_NODE | ||
850 | * sizeof(struct pm_signal))); | ||
851 | |||
852 | if (unlikely(ret)) { | ||
853 | printk(KERN_WARNING "%s: rtas returned: %d\n", | ||
854 | __FUNCTION__, ret); | ||
855 | return -EIO; | ||
856 | } | ||
857 | |||
858 | return 0; | ||
859 | } | ||
860 | |||
861 | #ifdef CONFIG_CPU_FREQ | ||
862 | static int | ||
863 | oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data) | ||
864 | { | ||
865 | int ret = 0; | ||
866 | struct cpufreq_freqs *frq = data; | ||
867 | if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) || | ||
868 | (val == CPUFREQ_POSTCHANGE && frq->old > frq->new) || | ||
869 | (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) | ||
870 | set_spu_profiling_frequency(frq->new, spu_cycle_reset); | ||
871 | return ret; | ||
872 | } | ||
873 | |||
874 | static struct notifier_block cpu_freq_notifier_block = { | ||
875 | .notifier_call = oprof_cpufreq_notify | ||
876 | }; | ||
877 | #endif | ||
878 | |||
879 | static int cell_global_start_spu(struct op_counter_config *ctr) | ||
880 | { | ||
881 | int subfunc; | ||
882 | unsigned int lfsr_value; | ||
883 | int cpu; | ||
884 | int ret; | ||
885 | int rtas_error; | ||
886 | unsigned int cpu_khzfreq = 0; | ||
887 | |||
888 | /* The SPU profiling uses time-based profiling based on | ||
889 | * cpu frequency, so if configured with the CPU_FREQ | ||
890 | * option, we should detect frequency changes and react | ||
891 | * accordingly. | ||
892 | */ | ||
893 | #ifdef CONFIG_CPU_FREQ | ||
894 | ret = cpufreq_register_notifier(&cpu_freq_notifier_block, | ||
895 | CPUFREQ_TRANSITION_NOTIFIER); | ||
896 | if (ret < 0) | ||
897 | /* this is not a fatal error */ | ||
898 | printk(KERN_ERR "CPU freq change registration failed: %d\n", | ||
899 | ret); | ||
900 | |||
901 | else | ||
902 | cpu_khzfreq = cpufreq_quick_get(smp_processor_id()); | ||
903 | #endif | ||
904 | |||
905 | set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset); | ||
906 | |||
907 | for_each_online_cpu(cpu) { | ||
908 | if (cbe_get_hw_thread_id(cpu)) | ||
909 | continue; | ||
910 | |||
911 | /* | ||
912 | * Setup SPU cycle-based profiling. | ||
913 | * Set perf_mon_control bit 0 to a zero before | ||
914 | * enabling spu collection hardware. | ||
915 | */ | ||
916 | cbe_write_pm(cpu, pm_control, 0); | ||
917 | |||
918 | if (spu_cycle_reset > MAX_SPU_COUNT) | ||
919 | /* use largest possible value */ | ||
920 | lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1); | ||
921 | else | ||
922 | lfsr_value = calculate_lfsr(spu_cycle_reset); | ||
923 | |||
924 | /* must use a non zero value. Zero disables data collection. */ | ||
925 | if (lfsr_value == 0) | ||
926 | lfsr_value = calculate_lfsr(1); | ||
927 | |||
928 | lfsr_value = lfsr_value << 8; /* shift lfsr to correct | ||
929 | * register location | ||
930 | */ | ||
931 | |||
932 | /* debug bus setup */ | ||
933 | ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu)); | ||
934 | |||
935 | if (unlikely(ret)) { | ||
936 | rtas_error = ret; | ||
937 | goto out; | ||
938 | } | ||
939 | |||
940 | |||
941 | subfunc = 2; /* 2 - activate SPU tracing, 3 - deactivate */ | ||
942 | |||
943 | /* start profiling */ | ||
944 | ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc, | ||
945 | cbe_cpu_to_node(cpu), lfsr_value); | ||
946 | |||
947 | if (unlikely(ret != 0)) { | ||
948 | printk(KERN_ERR | ||
949 | "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", | ||
950 | __FUNCTION__, ret); | ||
951 | rtas_error = -EIO; | ||
952 | goto out; | ||
953 | } | ||
954 | } | ||
955 | |||
956 | rtas_error = start_spu_profiling(spu_cycle_reset); | ||
957 | if (rtas_error) | ||
958 | goto out_stop; | ||
959 | |||
960 | oprofile_running = 1; | ||
961 | return 0; | ||
962 | |||
963 | out_stop: | ||
964 | cell_global_stop_spu(); /* clean up the PMU/debug bus */ | ||
613 | out: | 965 | out: |
614 | ; | 966 | return rtas_error; |
615 | } | 967 | } |
616 | 968 | ||
617 | static void cell_global_start(struct op_counter_config *ctr) | 969 | static int cell_global_start_ppu(struct op_counter_config *ctr) |
618 | { | 970 | { |
619 | u32 cpu; | 971 | u32 cpu, i; |
620 | u32 interrupt_mask = 0; | 972 | u32 interrupt_mask = 0; |
621 | u32 i; | ||
622 | 973 | ||
623 | /* This routine gets called once for the system. | 974 | /* This routine gets called once for the system. |
624 | * There is one performance monitor per node, so we | 975 | * There is one performance monitor per node, so we |
@@ -651,19 +1002,79 @@ static void cell_global_start(struct op_counter_config *ctr) | |||
651 | oprofile_running = 1; | 1002 | oprofile_running = 1; |
652 | smp_wmb(); | 1003 | smp_wmb(); |
653 | 1004 | ||
654 | /* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being | 1005 | /* |
655 | * executed which manipulates the PMU. We start the "virtual counter" | 1006 | * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being |
1007 | * executed which manipulates the PMU. We start the "virtual counter" | ||
656 | * here so that we do not need to synchronize access to the PMU in | 1008 | * here so that we do not need to synchronize access to the PMU in |
657 | * the above for-loop. | 1009 | * the above for-loop. |
658 | */ | 1010 | */ |
659 | start_virt_cntrs(); | 1011 | start_virt_cntrs(); |
1012 | |||
1013 | return 0; | ||
660 | } | 1014 | } |
661 | 1015 | ||
662 | static void cell_global_stop(void) | 1016 | static int cell_global_start(struct op_counter_config *ctr) |
1017 | { | ||
1018 | if (spu_cycle_reset) | ||
1019 | return cell_global_start_spu(ctr); | ||
1020 | else | ||
1021 | return cell_global_start_ppu(ctr); | ||
1022 | } | ||
1023 | |||
1024 | /* | ||
1025 | * Note the generic OProfile stop calls do not support returning | ||
1026 | * an error on stop. Hence, will not return an error if the FW | ||
1027 | * calls fail on stop. Failure to reset the debug bus is not an issue. | ||
1028 | * Failure to disable the SPU profiling is not an issue. The FW calls | ||
1029 | * to enable the performance counters and debug bus will work even if | ||
1030 | * the hardware was not cleanly reset. | ||
1031 | */ | ||
1032 | static void cell_global_stop_spu(void) | ||
1033 | { | ||
1034 | int subfunc, rtn_value; | ||
1035 | unsigned int lfsr_value; | ||
1036 | int cpu; | ||
1037 | |||
1038 | oprofile_running = 0; | ||
1039 | |||
1040 | #ifdef CONFIG_CPU_FREQ | ||
1041 | cpufreq_unregister_notifier(&cpu_freq_notifier_block, | ||
1042 | CPUFREQ_TRANSITION_NOTIFIER); | ||
1043 | #endif | ||
1044 | |||
1045 | for_each_online_cpu(cpu) { | ||
1046 | if (cbe_get_hw_thread_id(cpu)) | ||
1047 | continue; | ||
1048 | |||
1049 | subfunc = 3; /* | ||
1050 | * 2 - activate SPU tracing, | ||
1051 | * 3 - deactivate | ||
1052 | */ | ||
1053 | lfsr_value = 0x8f100000; | ||
1054 | |||
1055 | rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, | ||
1056 | subfunc, cbe_cpu_to_node(cpu), | ||
1057 | lfsr_value); | ||
1058 | |||
1059 | if (unlikely(rtn_value != 0)) { | ||
1060 | printk(KERN_ERR | ||
1061 | "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", | ||
1062 | __FUNCTION__, rtn_value); | ||
1063 | } | ||
1064 | |||
1065 | /* Deactivate the signals */ | ||
1066 | pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); | ||
1067 | } | ||
1068 | |||
1069 | stop_spu_profiling(); | ||
1070 | } | ||
1071 | |||
1072 | static void cell_global_stop_ppu(void) | ||
663 | { | 1073 | { |
664 | int cpu; | 1074 | int cpu; |
665 | 1075 | ||
666 | /* This routine will be called once for the system. | 1076 | /* |
1077 | * This routine will be called once for the system. | ||
667 | * There is one performance monitor per node, so we | 1078 | * There is one performance monitor per node, so we |
668 | * only need to perform this function once per node. | 1079 | * only need to perform this function once per node. |
669 | */ | 1080 | */ |
@@ -687,8 +1098,16 @@ static void cell_global_stop(void) | |||
687 | } | 1098 | } |
688 | } | 1099 | } |
689 | 1100 | ||
690 | static void | 1101 | static void cell_global_stop(void) |
691 | cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | 1102 | { |
1103 | if (spu_cycle_reset) | ||
1104 | cell_global_stop_spu(); | ||
1105 | else | ||
1106 | cell_global_stop_ppu(); | ||
1107 | } | ||
1108 | |||
1109 | static void cell_handle_interrupt(struct pt_regs *regs, | ||
1110 | struct op_counter_config *ctr) | ||
692 | { | 1111 | { |
693 | u32 cpu; | 1112 | u32 cpu; |
694 | u64 pc; | 1113 | u64 pc; |
@@ -699,13 +1118,15 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | |||
699 | 1118 | ||
700 | cpu = smp_processor_id(); | 1119 | cpu = smp_processor_id(); |
701 | 1120 | ||
702 | /* Need to make sure the interrupt handler and the virt counter | 1121 | /* |
1122 | * Need to make sure the interrupt handler and the virt counter | ||
703 | * routine are not running at the same time. See the | 1123 | * routine are not running at the same time. See the |
704 | * cell_virtual_cntr() routine for additional comments. | 1124 | * cell_virtual_cntr() routine for additional comments. |
705 | */ | 1125 | */ |
706 | spin_lock_irqsave(&virt_cntr_lock, flags); | 1126 | spin_lock_irqsave(&virt_cntr_lock, flags); |
707 | 1127 | ||
708 | /* Need to disable and reenable the performance counters | 1128 | /* |
1129 | * Need to disable and reenable the performance counters | ||
709 | * to get the desired behavior from the hardware. This | 1130 | * to get the desired behavior from the hardware. This |
710 | * is hardware specific. | 1131 | * is hardware specific. |
711 | */ | 1132 | */ |
@@ -714,7 +1135,8 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | |||
714 | 1135 | ||
715 | interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); | 1136 | interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); |
716 | 1137 | ||
717 | /* If the interrupt mask has been cleared, then the virt cntr | 1138 | /* |
1139 | * If the interrupt mask has been cleared, then the virt cntr | ||
718 | * has cleared the interrupt. When the thread that generated | 1140 | * has cleared the interrupt. When the thread that generated |
719 | * the interrupt is restored, the data count will be restored to | 1141 | * the interrupt is restored, the data count will be restored to |
720 | * 0xffffff0 to cause the interrupt to be regenerated. | 1142 | * 0xffffff0 to cause the interrupt to be regenerated. |
@@ -732,18 +1154,20 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | |||
732 | } | 1154 | } |
733 | } | 1155 | } |
734 | 1156 | ||
735 | /* The counters were frozen by the interrupt. | 1157 | /* |
1158 | * The counters were frozen by the interrupt. | ||
736 | * Reenable the interrupt and restart the counters. | 1159 | * Reenable the interrupt and restart the counters. |
737 | * If there was a race between the interrupt handler and | 1160 | * If there was a race between the interrupt handler and |
738 | * the virtual counter routine. The virutal counter | 1161 | * the virtual counter routine. The virutal counter |
739 | * routine may have cleared the interrupts. Hence must | 1162 | * routine may have cleared the interrupts. Hence must |
740 | * use the virt_cntr_inter_mask to re-enable the interrupts. | 1163 | * use the virt_cntr_inter_mask to re-enable the interrupts. |
741 | */ | 1164 | */ |
742 | cbe_enable_pm_interrupts(cpu, hdw_thread, | 1165 | cbe_enable_pm_interrupts(cpu, hdw_thread, |
743 | virt_cntr_inter_mask); | 1166 | virt_cntr_inter_mask); |
744 | 1167 | ||
745 | /* The writes to the various performance counters only writes | 1168 | /* |
746 | * to a latch. The new values (interrupt setting bits, reset | 1169 | * The writes to the various performance counters only writes |
1170 | * to a latch. The new values (interrupt setting bits, reset | ||
747 | * counter value etc.) are not copied to the actual registers | 1171 | * counter value etc.) are not copied to the actual registers |
748 | * until the performance monitor is enabled. In order to get | 1172 | * until the performance monitor is enabled. In order to get |
749 | * this to work as desired, the permormance monitor needs to | 1173 | * this to work as desired, the permormance monitor needs to |
@@ -755,10 +1179,33 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) | |||
755 | spin_unlock_irqrestore(&virt_cntr_lock, flags); | 1179 | spin_unlock_irqrestore(&virt_cntr_lock, flags); |
756 | } | 1180 | } |
757 | 1181 | ||
1182 | /* | ||
1183 | * This function is called from the generic OProfile | ||
1184 | * driver. When profiling PPUs, we need to do the | ||
1185 | * generic sync start; otherwise, do spu_sync_start. | ||
1186 | */ | ||
1187 | static int cell_sync_start(void) | ||
1188 | { | ||
1189 | if (spu_cycle_reset) | ||
1190 | return spu_sync_start(); | ||
1191 | else | ||
1192 | return DO_GENERIC_SYNC; | ||
1193 | } | ||
1194 | |||
1195 | static int cell_sync_stop(void) | ||
1196 | { | ||
1197 | if (spu_cycle_reset) | ||
1198 | return spu_sync_stop(); | ||
1199 | else | ||
1200 | return 1; | ||
1201 | } | ||
1202 | |||
758 | struct op_powerpc_model op_model_cell = { | 1203 | struct op_powerpc_model op_model_cell = { |
759 | .reg_setup = cell_reg_setup, | 1204 | .reg_setup = cell_reg_setup, |
760 | .cpu_setup = cell_cpu_setup, | 1205 | .cpu_setup = cell_cpu_setup, |
761 | .global_start = cell_global_start, | 1206 | .global_start = cell_global_start, |
762 | .global_stop = cell_global_stop, | 1207 | .global_stop = cell_global_stop, |
1208 | .sync_start = cell_sync_start, | ||
1209 | .sync_stop = cell_sync_stop, | ||
763 | .handle_interrupt = cell_handle_interrupt, | 1210 | .handle_interrupt = cell_handle_interrupt, |
764 | }; | 1211 | }; |
diff --git a/arch/powerpc/oprofile/op_model_fsl_booke.c b/arch/powerpc/oprofile/op_model_fsl_booke.c index 2267eb8c661b..183a28bb1812 100644 --- a/arch/powerpc/oprofile/op_model_fsl_booke.c +++ b/arch/powerpc/oprofile/op_model_fsl_booke.c | |||
@@ -244,7 +244,7 @@ static void dump_pmcs(void) | |||
244 | mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3)); | 244 | mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3)); |
245 | } | 245 | } |
246 | 246 | ||
247 | static void fsl_booke_cpu_setup(struct op_counter_config *ctr) | 247 | static int fsl_booke_cpu_setup(struct op_counter_config *ctr) |
248 | { | 248 | { |
249 | int i; | 249 | int i; |
250 | 250 | ||
@@ -258,9 +258,11 @@ static void fsl_booke_cpu_setup(struct op_counter_config *ctr) | |||
258 | 258 | ||
259 | set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel); | 259 | set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel); |
260 | } | 260 | } |
261 | |||
262 | return 0; | ||
261 | } | 263 | } |
262 | 264 | ||
263 | static void fsl_booke_reg_setup(struct op_counter_config *ctr, | 265 | static int fsl_booke_reg_setup(struct op_counter_config *ctr, |
264 | struct op_system_config *sys, | 266 | struct op_system_config *sys, |
265 | int num_ctrs) | 267 | int num_ctrs) |
266 | { | 268 | { |
@@ -276,9 +278,10 @@ static void fsl_booke_reg_setup(struct op_counter_config *ctr, | |||
276 | for (i = 0; i < num_counters; ++i) | 278 | for (i = 0; i < num_counters; ++i) |
277 | reset_value[i] = 0x80000000UL - ctr[i].count; | 279 | reset_value[i] = 0x80000000UL - ctr[i].count; |
278 | 280 | ||
281 | return 0; | ||
279 | } | 282 | } |
280 | 283 | ||
281 | static void fsl_booke_start(struct op_counter_config *ctr) | 284 | static int fsl_booke_start(struct op_counter_config *ctr) |
282 | { | 285 | { |
283 | int i; | 286 | int i; |
284 | 287 | ||
@@ -308,6 +311,8 @@ static void fsl_booke_start(struct op_counter_config *ctr) | |||
308 | 311 | ||
309 | pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(), | 312 | pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(), |
310 | mfpmr(PMRN_PMGC0)); | 313 | mfpmr(PMRN_PMGC0)); |
314 | |||
315 | return 0; | ||
311 | } | 316 | } |
312 | 317 | ||
313 | static void fsl_booke_stop(void) | 318 | static void fsl_booke_stop(void) |
diff --git a/arch/powerpc/oprofile/op_model_pa6t.c b/arch/powerpc/oprofile/op_model_pa6t.c index e8a56b0adadc..c40de461fd4e 100644 --- a/arch/powerpc/oprofile/op_model_pa6t.c +++ b/arch/powerpc/oprofile/op_model_pa6t.c | |||
@@ -89,7 +89,7 @@ static inline void ctr_write(unsigned int i, u64 val) | |||
89 | 89 | ||
90 | 90 | ||
91 | /* precompute the values to stuff in the hardware registers */ | 91 | /* precompute the values to stuff in the hardware registers */ |
92 | static void pa6t_reg_setup(struct op_counter_config *ctr, | 92 | static int pa6t_reg_setup(struct op_counter_config *ctr, |
93 | struct op_system_config *sys, | 93 | struct op_system_config *sys, |
94 | int num_ctrs) | 94 | int num_ctrs) |
95 | { | 95 | { |
@@ -135,10 +135,12 @@ static void pa6t_reg_setup(struct op_counter_config *ctr, | |||
135 | pr_debug("reset_value for pmc%u inited to 0x%lx\n", | 135 | pr_debug("reset_value for pmc%u inited to 0x%lx\n", |
136 | pmc, reset_value[pmc]); | 136 | pmc, reset_value[pmc]); |
137 | } | 137 | } |
138 | |||
139 | return 0; | ||
138 | } | 140 | } |
139 | 141 | ||
140 | /* configure registers on this cpu */ | 142 | /* configure registers on this cpu */ |
141 | static void pa6t_cpu_setup(struct op_counter_config *ctr) | 143 | static int pa6t_cpu_setup(struct op_counter_config *ctr) |
142 | { | 144 | { |
143 | u64 mmcr0 = mmcr0_val; | 145 | u64 mmcr0 = mmcr0_val; |
144 | u64 mmcr1 = mmcr1_val; | 146 | u64 mmcr1 = mmcr1_val; |
@@ -154,9 +156,11 @@ static void pa6t_cpu_setup(struct op_counter_config *ctr) | |||
154 | mfspr(SPRN_PA6T_MMCR0)); | 156 | mfspr(SPRN_PA6T_MMCR0)); |
155 | pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(), | 157 | pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(), |
156 | mfspr(SPRN_PA6T_MMCR1)); | 158 | mfspr(SPRN_PA6T_MMCR1)); |
159 | |||
160 | return 0; | ||
157 | } | 161 | } |
158 | 162 | ||
159 | static void pa6t_start(struct op_counter_config *ctr) | 163 | static int pa6t_start(struct op_counter_config *ctr) |
160 | { | 164 | { |
161 | int i; | 165 | int i; |
162 | 166 | ||
@@ -174,6 +178,8 @@ static void pa6t_start(struct op_counter_config *ctr) | |||
174 | oprofile_running = 1; | 178 | oprofile_running = 1; |
175 | 179 | ||
176 | pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0); | 180 | pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0); |
181 | |||
182 | return 0; | ||
177 | } | 183 | } |
178 | 184 | ||
179 | static void pa6t_stop(void) | 185 | static void pa6t_stop(void) |
diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c index a7c206b665af..cddc250a6a5c 100644 --- a/arch/powerpc/oprofile/op_model_power4.c +++ b/arch/powerpc/oprofile/op_model_power4.c | |||
@@ -32,7 +32,7 @@ static u32 mmcr0_val; | |||
32 | static u64 mmcr1_val; | 32 | static u64 mmcr1_val; |
33 | static u64 mmcra_val; | 33 | static u64 mmcra_val; |
34 | 34 | ||
35 | static void power4_reg_setup(struct op_counter_config *ctr, | 35 | static int power4_reg_setup(struct op_counter_config *ctr, |
36 | struct op_system_config *sys, | 36 | struct op_system_config *sys, |
37 | int num_ctrs) | 37 | int num_ctrs) |
38 | { | 38 | { |
@@ -60,6 +60,8 @@ static void power4_reg_setup(struct op_counter_config *ctr, | |||
60 | mmcr0_val &= ~MMCR0_PROBLEM_DISABLE; | 60 | mmcr0_val &= ~MMCR0_PROBLEM_DISABLE; |
61 | else | 61 | else |
62 | mmcr0_val |= MMCR0_PROBLEM_DISABLE; | 62 | mmcr0_val |= MMCR0_PROBLEM_DISABLE; |
63 | |||
64 | return 0; | ||
63 | } | 65 | } |
64 | 66 | ||
65 | extern void ppc64_enable_pmcs(void); | 67 | extern void ppc64_enable_pmcs(void); |
@@ -84,7 +86,7 @@ static inline int mmcra_must_set_sample(void) | |||
84 | return 0; | 86 | return 0; |
85 | } | 87 | } |
86 | 88 | ||
87 | static void power4_cpu_setup(struct op_counter_config *ctr) | 89 | static int power4_cpu_setup(struct op_counter_config *ctr) |
88 | { | 90 | { |
89 | unsigned int mmcr0 = mmcr0_val; | 91 | unsigned int mmcr0 = mmcr0_val; |
90 | unsigned long mmcra = mmcra_val; | 92 | unsigned long mmcra = mmcra_val; |
@@ -111,9 +113,11 @@ static void power4_cpu_setup(struct op_counter_config *ctr) | |||
111 | mfspr(SPRN_MMCR1)); | 113 | mfspr(SPRN_MMCR1)); |
112 | dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(), | 114 | dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(), |
113 | mfspr(SPRN_MMCRA)); | 115 | mfspr(SPRN_MMCRA)); |
116 | |||
117 | return 0; | ||
114 | } | 118 | } |
115 | 119 | ||
116 | static void power4_start(struct op_counter_config *ctr) | 120 | static int power4_start(struct op_counter_config *ctr) |
117 | { | 121 | { |
118 | int i; | 122 | int i; |
119 | unsigned int mmcr0; | 123 | unsigned int mmcr0; |
@@ -148,6 +152,7 @@ static void power4_start(struct op_counter_config *ctr) | |||
148 | oprofile_running = 1; | 152 | oprofile_running = 1; |
149 | 153 | ||
150 | dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); | 154 | dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); |
155 | return 0; | ||
151 | } | 156 | } |
152 | 157 | ||
153 | static void power4_stop(void) | 158 | static void power4_stop(void) |
diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c index c731acbfb2a5..a20afe45d936 100644 --- a/arch/powerpc/oprofile/op_model_rs64.c +++ b/arch/powerpc/oprofile/op_model_rs64.c | |||
@@ -88,7 +88,7 @@ static unsigned long reset_value[OP_MAX_COUNTER]; | |||
88 | 88 | ||
89 | static int num_counters; | 89 | static int num_counters; |
90 | 90 | ||
91 | static void rs64_reg_setup(struct op_counter_config *ctr, | 91 | static int rs64_reg_setup(struct op_counter_config *ctr, |
92 | struct op_system_config *sys, | 92 | struct op_system_config *sys, |
93 | int num_ctrs) | 93 | int num_ctrs) |
94 | { | 94 | { |
@@ -100,9 +100,10 @@ static void rs64_reg_setup(struct op_counter_config *ctr, | |||
100 | reset_value[i] = 0x80000000UL - ctr[i].count; | 100 | reset_value[i] = 0x80000000UL - ctr[i].count; |
101 | 101 | ||
102 | /* XXX setup user and kernel profiling */ | 102 | /* XXX setup user and kernel profiling */ |
103 | return 0; | ||
103 | } | 104 | } |
104 | 105 | ||
105 | static void rs64_cpu_setup(struct op_counter_config *ctr) | 106 | static int rs64_cpu_setup(struct op_counter_config *ctr) |
106 | { | 107 | { |
107 | unsigned int mmcr0; | 108 | unsigned int mmcr0; |
108 | 109 | ||
@@ -125,9 +126,11 @@ static void rs64_cpu_setup(struct op_counter_config *ctr) | |||
125 | mfspr(SPRN_MMCR0)); | 126 | mfspr(SPRN_MMCR0)); |
126 | dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), | 127 | dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), |
127 | mfspr(SPRN_MMCR1)); | 128 | mfspr(SPRN_MMCR1)); |
129 | |||
130 | return 0; | ||
128 | } | 131 | } |
129 | 132 | ||
130 | static void rs64_start(struct op_counter_config *ctr) | 133 | static int rs64_start(struct op_counter_config *ctr) |
131 | { | 134 | { |
132 | int i; | 135 | int i; |
133 | unsigned int mmcr0; | 136 | unsigned int mmcr0; |
@@ -155,6 +158,7 @@ static void rs64_start(struct op_counter_config *ctr) | |||
155 | mtspr(SPRN_MMCR0, mmcr0); | 158 | mtspr(SPRN_MMCR0, mmcr0); |
156 | 159 | ||
157 | dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); | 160 | dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); |
161 | return 0; | ||
158 | } | 162 | } |
159 | 163 | ||
160 | static void rs64_stop(void) | 164 | static void rs64_stop(void) |
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c index a7efb999d65e..6694f86d7000 100644 --- a/arch/powerpc/platforms/cell/spufs/context.c +++ b/arch/powerpc/platforms/cell/spufs/context.c | |||
@@ -22,6 +22,7 @@ | |||
22 | 22 | ||
23 | #include <linux/fs.h> | 23 | #include <linux/fs.h> |
24 | #include <linux/mm.h> | 24 | #include <linux/mm.h> |
25 | #include <linux/module.h> | ||
25 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
26 | #include <asm/atomic.h> | 27 | #include <asm/atomic.h> |
27 | #include <asm/spu.h> | 28 | #include <asm/spu.h> |
@@ -81,6 +82,8 @@ void destroy_spu_context(struct kref *kref) | |||
81 | spu_fini_csa(&ctx->csa); | 82 | spu_fini_csa(&ctx->csa); |
82 | if (ctx->gang) | 83 | if (ctx->gang) |
83 | spu_gang_remove_ctx(ctx->gang, ctx); | 84 | spu_gang_remove_ctx(ctx->gang, ctx); |
85 | if (ctx->prof_priv_kref) | ||
86 | kref_put(ctx->prof_priv_kref, ctx->prof_priv_release); | ||
84 | BUG_ON(!list_empty(&ctx->rq)); | 87 | BUG_ON(!list_empty(&ctx->rq)); |
85 | atomic_dec(&nr_spu_contexts); | 88 | atomic_dec(&nr_spu_contexts); |
86 | kfree(ctx); | 89 | kfree(ctx); |
@@ -185,3 +188,20 @@ void spu_release_saved(struct spu_context *ctx) | |||
185 | 188 | ||
186 | spu_release(ctx); | 189 | spu_release(ctx); |
187 | } | 190 | } |
191 | |||
192 | void spu_set_profile_private_kref(struct spu_context *ctx, | ||
193 | struct kref *prof_info_kref, | ||
194 | void ( * prof_info_release) (struct kref *kref)) | ||
195 | { | ||
196 | ctx->prof_priv_kref = prof_info_kref; | ||
197 | ctx->prof_priv_release = prof_info_release; | ||
198 | } | ||
199 | EXPORT_SYMBOL_GPL(spu_set_profile_private_kref); | ||
200 | |||
201 | void *spu_get_profile_private_kref(struct spu_context *ctx) | ||
202 | { | ||
203 | return ctx->prof_priv_kref; | ||
204 | } | ||
205 | EXPORT_SYMBOL_GPL(spu_get_profile_private_kref); | ||
206 | |||
207 | |||
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 88ec333e90d3..44e2338a05d5 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c | |||
@@ -274,6 +274,7 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) | |||
274 | ctx->spu = spu; | 274 | ctx->spu = spu; |
275 | ctx->ops = &spu_hw_ops; | 275 | ctx->ops = &spu_hw_ops; |
276 | spu->pid = current->pid; | 276 | spu->pid = current->pid; |
277 | spu->tgid = current->tgid; | ||
277 | spu_associate_mm(spu, ctx->owner); | 278 | spu_associate_mm(spu, ctx->owner); |
278 | spu->ibox_callback = spufs_ibox_callback; | 279 | spu->ibox_callback = spufs_ibox_callback; |
279 | spu->wbox_callback = spufs_wbox_callback; | 280 | spu->wbox_callback = spufs_wbox_callback; |
@@ -456,6 +457,7 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) | |||
456 | spu->dma_callback = NULL; | 457 | spu->dma_callback = NULL; |
457 | spu_associate_mm(spu, NULL); | 458 | spu_associate_mm(spu, NULL); |
458 | spu->pid = 0; | 459 | spu->pid = 0; |
460 | spu->tgid = 0; | ||
459 | ctx->ops = &spu_backing_ops; | 461 | ctx->ops = &spu_backing_ops; |
460 | spu->flags = 0; | 462 | spu->flags = 0; |
461 | spu->ctx = NULL; | 463 | spu->ctx = NULL; |
@@ -737,7 +739,7 @@ void spu_deactivate(struct spu_context *ctx) | |||
737 | } | 739 | } |
738 | 740 | ||
739 | /** | 741 | /** |
740 | * spu_yield - yield a physical spu if others are waiting | 742 | * spu_yield - yield a physical spu if others are waiting |
741 | * @ctx: spu context to yield | 743 | * @ctx: spu context to yield |
742 | * | 744 | * |
743 | * Check if there is a higher priority context waiting and if yes | 745 | * Check if there is a higher priority context waiting and if yes |
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 692dbd0edc37..8b20c0c1556f 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h | |||
@@ -85,6 +85,8 @@ struct spu_context { | |||
85 | 85 | ||
86 | struct list_head gang_list; | 86 | struct list_head gang_list; |
87 | struct spu_gang *gang; | 87 | struct spu_gang *gang; |
88 | struct kref *prof_priv_kref; | ||
89 | void ( * prof_priv_release) (struct kref *kref); | ||
88 | 90 | ||
89 | /* owner thread */ | 91 | /* owner thread */ |
90 | pid_t tid; | 92 | pid_t tid; |