aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-20 16:45:53 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-20 16:45:53 -0400
commitede13d81b4dda409a6d271b34b8e2ec9383e255d (patch)
tree2e32142d5a8e076c64f1871f5ad162fffff5f357 /arch
parent2008220879af095d00ca27eb168a55c8595fbc0b (diff)
parent486acd4850dde6d2f8c7f431432f3914c4bfb5f5 (diff)
Merge branch 'for-2.6.23' of master.kernel.org:/pub/scm/linux/kernel/git/arnd/cell-2.6
* 'for-2.6.23' of master.kernel.org:/pub/scm/linux/kernel/git/arnd/cell-2.6: (37 commits) [CELL] spufs: rework list management and associated locking [CELL] oprofile: add support to OProfile for profiling CELL BE SPUs [CELL] oprofile: enable SPU switch notification to detect currently active SPU tasks [CELL] spu_base: locking cleanup [CELL] cell: indexing of SPUs based on firmware vicinity properties [CELL] spufs: integration of SPE affinity with the scheduller [CELL] cell: add placement computation for scheduling of affinity contexts [CELL] spufs: extension of spu_create to support affinity definition [CELL] cell: add hardcoded spu vicinity information for QS20 [CELL] cell: add vicinity information on spus [CELL] cell: add per BE structure with info about its SPUs [CELL] spufs: use find_first_bit() instead of sched_find_first_bit() [CELL] spufs: remove unused file argument from spufs_run_spu() [CELL] spufs: change decrementer restore timing [CELL] spufs: dont halt decrementer at restore step 47 [CELL] spufs: limit saving MFC_CNTL bits [CELL] spufs: fix read and write for decr_status file [CELL] spufs: fix decr_status meanings [CELL] spufs: remove needless context save/restore code [CELL] spufs: fix array size of channel index ...
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/configs/cell_defconfig3
-rw-r--r--arch/powerpc/kernel/crash.c67
-rw-r--r--arch/powerpc/kernel/time.c1
-rw-r--r--arch/powerpc/oprofile/Kconfig7
-rw-r--r--arch/powerpc/oprofile/Makefile4
-rw-r--r--arch/powerpc/oprofile/cell/pr_util.h97
-rw-r--r--arch/powerpc/oprofile/cell/spu_profiler.c221
-rw-r--r--arch/powerpc/oprofile/cell/spu_task_sync.c484
-rw-r--r--arch/powerpc/oprofile/cell/vma_map.c287
-rw-r--r--arch/powerpc/oprofile/common.c51
-rw-r--r--arch/powerpc/oprofile/op_model_7450.c14
-rw-r--r--arch/powerpc/oprofile/op_model_cell.c607
-rw-r--r--arch/powerpc/oprofile/op_model_fsl_booke.c11
-rw-r--r--arch/powerpc/oprofile/op_model_pa6t.c12
-rw-r--r--arch/powerpc/oprofile/op_model_power4.c11
-rw-r--r--arch/powerpc/oprofile/op_model_rs64.c10
-rw-r--r--arch/powerpc/platforms/Kconfig10
-rw-r--r--arch/powerpc/platforms/cell/Kconfig10
-rw-r--r--arch/powerpc/platforms/cell/Makefile6
-rw-r--r--arch/powerpc/platforms/cell/axon_msi.c445
-rw-r--r--arch/powerpc/platforms/cell/cbe_cpufreq.c217
-rw-r--r--arch/powerpc/platforms/cell/cbe_cpufreq.h24
-rw-r--r--arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c115
-rw-r--r--arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c148
-rw-r--r--arch/powerpc/platforms/cell/cbe_regs.c7
-rw-r--r--arch/powerpc/platforms/cell/cbe_thermal.c25
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c295
-rw-r--r--arch/powerpc/platforms/cell/spu_syscalls.c17
-rw-r--r--arch/powerpc/platforms/cell/spufs/context.c42
-rw-r--r--arch/powerpc/platforms/cell/spufs/coredump.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/fault.c8
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c104
-rw-r--r--arch/powerpc/platforms/cell/spufs/gang.c6
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c132
-rw-r--r--arch/powerpc/platforms/cell/spufs/run.c36
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c377
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_restore.c6
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped480
-rw-r--r--arch/powerpc/platforms/cell/spufs/spufs.h99
-rw-r--r--arch/powerpc/platforms/cell/spufs/switch.c72
-rw-r--r--arch/powerpc/platforms/cell/spufs/syscalls.c34
-rw-r--r--arch/powerpc/sysdev/Makefile1
-rw-r--r--arch/powerpc/sysdev/axonram.c381
-rw-r--r--arch/powerpc/sysdev/pmi.c51
44 files changed, 4166 insertions, 871 deletions
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index 74f83f4a4e5e..d9ac24e8de16 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -1455,7 +1455,8 @@ CONFIG_HAS_DMA=y
1455# Instrumentation Support 1455# Instrumentation Support
1456# 1456#
1457CONFIG_PROFILING=y 1457CONFIG_PROFILING=y
1458CONFIG_OPROFILE=y 1458CONFIG_OPROFILE=m
1459CONFIG_OPROFILE_CELL=y
1459# CONFIG_KPROBES is not set 1460# CONFIG_KPROBES is not set
1460 1461
1461# 1462#
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index d3f2080d2eee..37658ea417fa 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -219,6 +219,72 @@ void crash_kexec_secondary(struct pt_regs *regs)
219 cpus_in_sr = CPU_MASK_NONE; 219 cpus_in_sr = CPU_MASK_NONE;
220} 220}
221#endif 221#endif
222#ifdef CONFIG_SPU_BASE
223
224#include <asm/spu.h>
225#include <asm/spu_priv1.h>
226
227struct crash_spu_info {
228 struct spu *spu;
229 u32 saved_spu_runcntl_RW;
230 u32 saved_spu_status_R;
231 u32 saved_spu_npc_RW;
232 u64 saved_mfc_sr1_RW;
233 u64 saved_mfc_dar;
234 u64 saved_mfc_dsisr;
235};
236
237#define CRASH_NUM_SPUS 16 /* Enough for current hardware */
238static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS];
239
240static void crash_kexec_stop_spus(void)
241{
242 struct spu *spu;
243 int i;
244 u64 tmp;
245
246 for (i = 0; i < CRASH_NUM_SPUS; i++) {
247 if (!crash_spu_info[i].spu)
248 continue;
249
250 spu = crash_spu_info[i].spu;
251
252 crash_spu_info[i].saved_spu_runcntl_RW =
253 in_be32(&spu->problem->spu_runcntl_RW);
254 crash_spu_info[i].saved_spu_status_R =
255 in_be32(&spu->problem->spu_status_R);
256 crash_spu_info[i].saved_spu_npc_RW =
257 in_be32(&spu->problem->spu_npc_RW);
258
259 crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu);
260 crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu);
261 tmp = spu_mfc_sr1_get(spu);
262 crash_spu_info[i].saved_mfc_sr1_RW = tmp;
263
264 tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
265 spu_mfc_sr1_set(spu, tmp);
266
267 __delay(200);
268 }
269}
270
271void crash_register_spus(struct list_head *list)
272{
273 struct spu *spu;
274
275 list_for_each_entry(spu, list, full_list) {
276 if (WARN_ON(spu->number >= CRASH_NUM_SPUS))
277 continue;
278
279 crash_spu_info[spu->number].spu = spu;
280 }
281}
282
283#else
284static inline void crash_kexec_stop_spus(void)
285{
286}
287#endif /* CONFIG_SPU_BASE */
222 288
223void default_machine_crash_shutdown(struct pt_regs *regs) 289void default_machine_crash_shutdown(struct pt_regs *regs)
224{ 290{
@@ -254,6 +320,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
254 crash_save_cpu(regs, crashing_cpu); 320 crash_save_cpu(regs, crashing_cpu);
255 crash_kexec_prepare_cpus(crashing_cpu); 321 crash_kexec_prepare_cpus(crashing_cpu);
256 cpu_set(crashing_cpu, cpus_in_crash); 322 cpu_set(crashing_cpu, cpus_in_crash);
323 crash_kexec_stop_spus();
257 if (ppc_md.kexec_cpu_down) 324 if (ppc_md.kexec_cpu_down)
258 ppc_md.kexec_cpu_down(1, 0); 325 ppc_md.kexec_cpu_down(1, 0);
259} 326}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e5df167f7824..727a6699f2f4 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -122,6 +122,7 @@ extern struct timezone sys_tz;
122static long timezone_offset; 122static long timezone_offset;
123 123
124unsigned long ppc_proc_freq; 124unsigned long ppc_proc_freq;
125EXPORT_SYMBOL(ppc_proc_freq);
125unsigned long ppc_tb_freq; 126unsigned long ppc_tb_freq;
126 127
127static u64 tb_last_jiffy __cacheline_aligned_in_smp; 128static u64 tb_last_jiffy __cacheline_aligned_in_smp;
diff --git a/arch/powerpc/oprofile/Kconfig b/arch/powerpc/oprofile/Kconfig
index eb2dece76a54..7089e79689b9 100644
--- a/arch/powerpc/oprofile/Kconfig
+++ b/arch/powerpc/oprofile/Kconfig
@@ -15,3 +15,10 @@ config OPROFILE
15 15
16 If unsure, say N. 16 If unsure, say N.
17 17
18config OPROFILE_CELL
19 bool "OProfile for Cell Broadband Engine"
20 depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)
21 default y
22 help
23 Profiling of Cell BE SPUs requires special support enabled
24 by this option.
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile
index 4b5f9528218c..c5f64c3bd668 100644
--- a/arch/powerpc/oprofile/Makefile
+++ b/arch/powerpc/oprofile/Makefile
@@ -11,7 +11,9 @@ DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
11 timer_int.o ) 11 timer_int.o )
12 12
13oprofile-y := $(DRIVER_OBJS) common.o backtrace.o 13oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
14oprofile-$(CONFIG_PPC_CELL_NATIVE) += op_model_cell.o 14oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \
15 cell/spu_profiler.o cell/vma_map.o \
16 cell/spu_task_sync.o
15oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o 17oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o
16oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o 18oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o
17oprofile-$(CONFIG_6xx) += op_model_7450.o 19oprofile-$(CONFIG_6xx) += op_model_7450.o
diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h
new file mode 100644
index 000000000000..e5704f00c8b4
--- /dev/null
+++ b/arch/powerpc/oprofile/cell/pr_util.h
@@ -0,0 +1,97 @@
1 /*
2 * Cell Broadband Engine OProfile Support
3 *
4 * (C) Copyright IBM Corporation 2006
5 *
6 * Author: Maynard Johnson <maynardj@us.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#ifndef PR_UTIL_H
15#define PR_UTIL_H
16
17#include <linux/cpumask.h>
18#include <linux/oprofile.h>
19#include <asm/cell-pmu.h>
20#include <asm/spu.h>
21
22#include "../../platforms/cell/cbe_regs.h"
23
24/* Defines used for sync_start */
25#define SKIP_GENERIC_SYNC 0
26#define SYNC_START_ERROR -1
27#define DO_GENERIC_SYNC 1
28
29struct spu_overlay_info { /* map of sections within an SPU overlay */
30 unsigned int vma; /* SPU virtual memory address from elf */
31 unsigned int size; /* size of section from elf */
32 unsigned int offset; /* offset of section into elf file */
33 unsigned int buf;
34};
35
36struct vma_to_fileoffset_map { /* map of sections within an SPU program */
37 struct vma_to_fileoffset_map *next; /* list pointer */
38 unsigned int vma; /* SPU virtual memory address from elf */
39 unsigned int size; /* size of section from elf */
40 unsigned int offset; /* offset of section into elf file */
41 unsigned int guard_ptr;
42 unsigned int guard_val;
43 /*
44 * The guard pointer is an entry in the _ovly_buf_table,
45 * computed using ovly.buf as the index into the table. Since
46 * ovly.buf values begin at '1' to reference the first (or 0th)
47 * entry in the _ovly_buf_table, the computation subtracts 1
48 * from ovly.buf.
49 * The guard value is stored in the _ovly_buf_table entry and
50 * is an index (starting at 1) back to the _ovly_table entry
51 * that is pointing at this _ovly_buf_table entry. So, for
52 * example, for an overlay scenario with one overlay segment
53 * and two overlay sections:
54 * - Section 1 points to the first entry of the
55 * _ovly_buf_table, which contains a guard value
56 * of '1', referencing the first (index=0) entry of
57 * _ovly_table.
58 * - Section 2 points to the second entry of the
59 * _ovly_buf_table, which contains a guard value
60 * of '2', referencing the second (index=1) entry of
61 * _ovly_table.
62 */
63
64};
65
66/* The three functions below are for maintaining and accessing
67 * the vma-to-fileoffset map.
68 */
69struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
70 u64 objectid);
71unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
72 unsigned int vma, const struct spu *aSpu,
73 int *grd_val);
74void vma_map_free(struct vma_to_fileoffset_map *map);
75
76/*
77 * Entry point for SPU profiling.
78 * cycles_reset is the SPU_CYCLES count value specified by the user.
79 */
80int start_spu_profiling(unsigned int cycles_reset);
81
82void stop_spu_profiling(void);
83
84
85/* add the necessary profiling hooks */
86int spu_sync_start(void);
87
88/* remove the hooks */
89int spu_sync_stop(void);
90
91/* Record SPU program counter samples to the oprofile event buffer. */
92void spu_sync_buffer(int spu_num, unsigned int *samples,
93 int num_samples);
94
95void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset);
96
97#endif /* PR_UTIL_H */
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c
new file mode 100644
index 000000000000..380d7e217531
--- /dev/null
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -0,0 +1,221 @@
1/*
2 * Cell Broadband Engine OProfile Support
3 *
4 * (C) Copyright IBM Corporation 2006
5 *
6 * Authors: Maynard Johnson <maynardj@us.ibm.com>
7 * Carl Love <carll@us.ibm.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#include <linux/hrtimer.h>
16#include <linux/smp.h>
17#include <linux/slab.h>
18#include <asm/cell-pmu.h>
19#include "pr_util.h"
20
21#define TRACE_ARRAY_SIZE 1024
22#define SCALE_SHIFT 14
23
24static u32 *samples;
25
26static int spu_prof_running;
27static unsigned int profiling_interval;
28
29#define NUM_SPU_BITS_TRBUF 16
30#define SPUS_PER_TB_ENTRY 4
31#define SPUS_PER_NODE 8
32
33#define SPU_PC_MASK 0xFFFF
34
35static DEFINE_SPINLOCK(sample_array_lock);
36unsigned long sample_array_lock_flags;
37
38void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
39{
40 unsigned long ns_per_cyc;
41
42 if (!freq_khz)
43 freq_khz = ppc_proc_freq/1000;
44
45 /* To calculate a timeout in nanoseconds, the basic
46 * formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency).
47 * To avoid floating point math, we use the scale math
48 * technique as described in linux/jiffies.h. We use
49 * a scale factor of SCALE_SHIFT, which provides 4 decimal places
50 * of precision. This is close enough for the purpose at hand.
51 *
52 * The value of the timeout should be small enough that the hw
53 * trace buffer will not get more then about 1/3 full for the
54 * maximum user specified (the LFSR value) hw sampling frequency.
55 * This is to ensure the trace buffer will never fill even if the
56 * kernel thread scheduling varies under a heavy system load.
57 */
58
59 ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz;
60 profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT;
61
62}
63
64/*
65 * Extract SPU PC from trace buffer entry
66 */
67static void spu_pc_extract(int cpu, int entry)
68{
69 /* the trace buffer is 128 bits */
70 u64 trace_buffer[2];
71 u64 spu_mask;
72 int spu;
73
74 spu_mask = SPU_PC_MASK;
75
76 /* Each SPU PC is 16 bits; hence, four spus in each of
77 * the two 64-bit buffer entries that make up the
78 * 128-bit trace_buffer entry. Process two 64-bit values
79 * simultaneously.
80 * trace[0] SPU PC contents are: 0 1 2 3
81 * trace[1] SPU PC contents are: 4 5 6 7
82 */
83
84 cbe_read_trace_buffer(cpu, trace_buffer);
85
86 for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) {
87 /* spu PC trace entry is upper 16 bits of the
88 * 18 bit SPU program counter
89 */
90 samples[spu * TRACE_ARRAY_SIZE + entry]
91 = (spu_mask & trace_buffer[0]) << 2;
92 samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry]
93 = (spu_mask & trace_buffer[1]) << 2;
94
95 trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF;
96 trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF;
97 }
98}
99
100static int cell_spu_pc_collection(int cpu)
101{
102 u32 trace_addr;
103 int entry;
104
105 /* process the collected SPU PC for the node */
106
107 entry = 0;
108
109 trace_addr = cbe_read_pm(cpu, trace_address);
110 while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
111 /* there is data in the trace buffer to process */
112 spu_pc_extract(cpu, entry);
113
114 entry++;
115
116 if (entry >= TRACE_ARRAY_SIZE)
117 /* spu_samples is full */
118 break;
119
120 trace_addr = cbe_read_pm(cpu, trace_address);
121 }
122
123 return entry;
124}
125
126
127static enum hrtimer_restart profile_spus(struct hrtimer *timer)
128{
129 ktime_t kt;
130 int cpu, node, k, num_samples, spu_num;
131
132 if (!spu_prof_running)
133 goto stop;
134
135 for_each_online_cpu(cpu) {
136 if (cbe_get_hw_thread_id(cpu))
137 continue;
138
139 node = cbe_cpu_to_node(cpu);
140
141 /* There should only be one kernel thread at a time processing
142 * the samples. In the very unlikely case that the processing
143 * is taking a very long time and multiple kernel threads are
144 * started to process the samples. Make sure only one kernel
145 * thread is working on the samples array at a time. The
146 * sample array must be loaded and then processed for a given
147 * cpu. The sample array is not per cpu.
148 */
149 spin_lock_irqsave(&sample_array_lock,
150 sample_array_lock_flags);
151 num_samples = cell_spu_pc_collection(cpu);
152
153 if (num_samples == 0) {
154 spin_unlock_irqrestore(&sample_array_lock,
155 sample_array_lock_flags);
156 continue;
157 }
158
159 for (k = 0; k < SPUS_PER_NODE; k++) {
160 spu_num = k + (node * SPUS_PER_NODE);
161 spu_sync_buffer(spu_num,
162 samples + (k * TRACE_ARRAY_SIZE),
163 num_samples);
164 }
165
166 spin_unlock_irqrestore(&sample_array_lock,
167 sample_array_lock_flags);
168
169 }
170 smp_wmb(); /* insure spu event buffer updates are written */
171 /* don't want events intermingled... */
172
173 kt = ktime_set(0, profiling_interval);
174 if (!spu_prof_running)
175 goto stop;
176 hrtimer_forward(timer, timer->base->get_time(), kt);
177 return HRTIMER_RESTART;
178
179 stop:
180 printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n");
181 return HRTIMER_NORESTART;
182}
183
184static struct hrtimer timer;
185/*
186 * Entry point for SPU profiling.
187 * NOTE: SPU profiling is done system-wide, not per-CPU.
188 *
189 * cycles_reset is the count value specified by the user when
190 * setting up OProfile to count SPU_CYCLES.
191 */
192int start_spu_profiling(unsigned int cycles_reset)
193{
194 ktime_t kt;
195
196 pr_debug("timer resolution: %lu\n", TICK_NSEC);
197 kt = ktime_set(0, profiling_interval);
198 hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
199 timer.expires = kt;
200 timer.function = profile_spus;
201
202 /* Allocate arrays for collecting SPU PC samples */
203 samples = kzalloc(SPUS_PER_NODE *
204 TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL);
205
206 if (!samples)
207 return -ENOMEM;
208
209 spu_prof_running = 1;
210 hrtimer_start(&timer, kt, HRTIMER_MODE_REL);
211
212 return 0;
213}
214
215void stop_spu_profiling(void)
216{
217 spu_prof_running = 0;
218 hrtimer_cancel(&timer);
219 kfree(samples);
220 pr_debug("SPU_PROF: stop_spu_profiling issued\n");
221}
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
new file mode 100644
index 000000000000..133665754a75
--- /dev/null
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -0,0 +1,484 @@
1/*
2 * Cell Broadband Engine OProfile Support
3 *
4 * (C) Copyright IBM Corporation 2006
5 *
6 * Author: Maynard Johnson <maynardj@us.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* The purpose of this file is to handle SPU event task switching
15 * and to record SPU context information into the OProfile
16 * event buffer.
17 *
18 * Additionally, the spu_sync_buffer function is provided as a helper
19 * for recoding actual SPU program counter samples to the event buffer.
20 */
21#include <linux/dcookies.h>
22#include <linux/kref.h>
23#include <linux/mm.h>
24#include <linux/module.h>
25#include <linux/notifier.h>
26#include <linux/numa.h>
27#include <linux/oprofile.h>
28#include <linux/spinlock.h>
29#include "pr_util.h"
30
31#define RELEASE_ALL 9999
32
33static DEFINE_SPINLOCK(buffer_lock);
34static DEFINE_SPINLOCK(cache_lock);
35static int num_spu_nodes;
36int spu_prof_num_nodes;
37int last_guard_val[MAX_NUMNODES * 8];
38
39/* Container for caching information about an active SPU task. */
40struct cached_info {
41 struct vma_to_fileoffset_map *map;
42 struct spu *the_spu; /* needed to access pointer to local_store */
43 struct kref cache_ref;
44};
45
46static struct cached_info *spu_info[MAX_NUMNODES * 8];
47
48static void destroy_cached_info(struct kref *kref)
49{
50 struct cached_info *info;
51
52 info = container_of(kref, struct cached_info, cache_ref);
53 vma_map_free(info->map);
54 kfree(info);
55 module_put(THIS_MODULE);
56}
57
58/* Return the cached_info for the passed SPU number.
59 * ATTENTION: Callers are responsible for obtaining the
60 * cache_lock if needed prior to invoking this function.
61 */
62static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num)
63{
64 struct kref *ref;
65 struct cached_info *ret_info;
66
67 if (spu_num >= num_spu_nodes) {
68 printk(KERN_ERR "SPU_PROF: "
69 "%s, line %d: Invalid index %d into spu info cache\n",
70 __FUNCTION__, __LINE__, spu_num);
71 ret_info = NULL;
72 goto out;
73 }
74 if (!spu_info[spu_num] && the_spu) {
75 ref = spu_get_profile_private_kref(the_spu->ctx);
76 if (ref) {
77 spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref);
78 kref_get(&spu_info[spu_num]->cache_ref);
79 }
80 }
81
82 ret_info = spu_info[spu_num];
83 out:
84 return ret_info;
85}
86
87
88/* Looks for cached info for the passed spu. If not found, the
89 * cached info is created for the passed spu.
90 * Returns 0 for success; otherwise, -1 for error.
91 */
92static int
93prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
94{
95 unsigned long flags;
96 struct vma_to_fileoffset_map *new_map;
97 int retval = 0;
98 struct cached_info *info;
99
100 /* We won't bother getting cache_lock here since
101 * don't do anything with the cached_info that's returned.
102 */
103 info = get_cached_info(spu, spu->number);
104
105 if (info) {
106 pr_debug("Found cached SPU info.\n");
107 goto out;
108 }
109
110 /* Create cached_info and set spu_info[spu->number] to point to it.
111 * spu->number is a system-wide value, not a per-node value.
112 */
113 info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
114 if (!info) {
115 printk(KERN_ERR "SPU_PROF: "
116 "%s, line %d: create vma_map failed\n",
117 __FUNCTION__, __LINE__);
118 retval = -ENOMEM;
119 goto err_alloc;
120 }
121 new_map = create_vma_map(spu, objectId);
122 if (!new_map) {
123 printk(KERN_ERR "SPU_PROF: "
124 "%s, line %d: create vma_map failed\n",
125 __FUNCTION__, __LINE__);
126 retval = -ENOMEM;
127 goto err_alloc;
128 }
129
130 pr_debug("Created vma_map\n");
131 info->map = new_map;
132 info->the_spu = spu;
133 kref_init(&info->cache_ref);
134 spin_lock_irqsave(&cache_lock, flags);
135 spu_info[spu->number] = info;
136 /* Increment count before passing off ref to SPUFS. */
137 kref_get(&info->cache_ref);
138
139 /* We increment the module refcount here since SPUFS is
140 * responsible for the final destruction of the cached_info,
141 * and it must be able to access the destroy_cached_info()
142 * function defined in the OProfile module. We decrement
143 * the module refcount in destroy_cached_info.
144 */
145 try_module_get(THIS_MODULE);
146 spu_set_profile_private_kref(spu->ctx, &info->cache_ref,
147 destroy_cached_info);
148 spin_unlock_irqrestore(&cache_lock, flags);
149 goto out;
150
151err_alloc:
152 kfree(info);
153out:
154 return retval;
155}
156
157/*
158 * NOTE: The caller is responsible for locking the
159 * cache_lock prior to calling this function.
160 */
161static int release_cached_info(int spu_index)
162{
163 int index, end;
164
165 if (spu_index == RELEASE_ALL) {
166 end = num_spu_nodes;
167 index = 0;
168 } else {
169 if (spu_index >= num_spu_nodes) {
170 printk(KERN_ERR "SPU_PROF: "
171 "%s, line %d: "
172 "Invalid index %d into spu info cache\n",
173 __FUNCTION__, __LINE__, spu_index);
174 goto out;
175 }
176 end = spu_index + 1;
177 index = spu_index;
178 }
179 for (; index < end; index++) {
180 if (spu_info[index]) {
181 kref_put(&spu_info[index]->cache_ref,
182 destroy_cached_info);
183 spu_info[index] = NULL;
184 }
185 }
186
187out:
188 return 0;
189}
190
191/* The source code for fast_get_dcookie was "borrowed"
192 * from drivers/oprofile/buffer_sync.c.
193 */
194
195/* Optimisation. We can manage without taking the dcookie sem
196 * because we cannot reach this code without at least one
197 * dcookie user still being registered (namely, the reader
198 * of the event buffer).
199 */
200static inline unsigned long fast_get_dcookie(struct dentry *dentry,
201 struct vfsmount *vfsmnt)
202{
203 unsigned long cookie;
204
205 if (dentry->d_cookie)
206 return (unsigned long)dentry;
207 get_dcookie(dentry, vfsmnt, &cookie);
208 return cookie;
209}
210
211/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
212 * which corresponds loosely to "application name". Also, determine
213 * the offset for the SPU ELF object. If computed offset is
214 * non-zero, it implies an embedded SPU object; otherwise, it's a
215 * separate SPU binary, in which case we retrieve it's dcookie.
216 * For the embedded case, we must determine if SPU ELF is embedded
217 * in the executable application or another file (i.e., shared lib).
218 * If embedded in a shared lib, we must get the dcookie and return
219 * that to the caller.
220 */
221static unsigned long
222get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
223 unsigned long *spu_bin_dcookie,
224 unsigned long spu_ref)
225{
226 unsigned long app_cookie = 0;
227 unsigned int my_offset = 0;
228 struct file *app = NULL;
229 struct vm_area_struct *vma;
230 struct mm_struct *mm = spu->mm;
231
232 if (!mm)
233 goto out;
234
235 down_read(&mm->mmap_sem);
236
237 for (vma = mm->mmap; vma; vma = vma->vm_next) {
238 if (!vma->vm_file)
239 continue;
240 if (!(vma->vm_flags & VM_EXECUTABLE))
241 continue;
242 app_cookie = fast_get_dcookie(vma->vm_file->f_dentry,
243 vma->vm_file->f_vfsmnt);
244 pr_debug("got dcookie for %s\n",
245 vma->vm_file->f_dentry->d_name.name);
246 app = vma->vm_file;
247 break;
248 }
249
250 for (vma = mm->mmap; vma; vma = vma->vm_next) {
251 if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref)
252 continue;
253 my_offset = spu_ref - vma->vm_start;
254 if (!vma->vm_file)
255 goto fail_no_image_cookie;
256
257 pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n",
258 my_offset, spu_ref,
259 vma->vm_file->f_dentry->d_name.name);
260 *offsetp = my_offset;
261 break;
262 }
263
264 *spu_bin_dcookie = fast_get_dcookie(vma->vm_file->f_dentry,
265 vma->vm_file->f_vfsmnt);
266 pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name);
267
268 up_read(&mm->mmap_sem);
269
270out:
271 return app_cookie;
272
273fail_no_image_cookie:
274 up_read(&mm->mmap_sem);
275
276 printk(KERN_ERR "SPU_PROF: "
277 "%s, line %d: Cannot find dcookie for SPU binary\n",
278 __FUNCTION__, __LINE__);
279 goto out;
280}
281
282
283
284/* This function finds or creates cached context information for the
285 * passed SPU and records SPU context information into the OProfile
286 * event buffer.
287 */
288static int process_context_switch(struct spu *spu, unsigned long objectId)
289{
290 unsigned long flags;
291 int retval;
292 unsigned int offset = 0;
293 unsigned long spu_cookie = 0, app_dcookie;
294
295 retval = prepare_cached_spu_info(spu, objectId);
296 if (retval)
297 goto out;
298
299 /* Get dcookie first because a mutex_lock is taken in that
300 * code path, so interrupts must not be disabled.
301 */
302 app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId);
303 if (!app_dcookie || !spu_cookie) {
304 retval = -ENOENT;
305 goto out;
306 }
307
308 /* Record context info in event buffer */
309 spin_lock_irqsave(&buffer_lock, flags);
310 add_event_entry(ESCAPE_CODE);
311 add_event_entry(SPU_CTX_SWITCH_CODE);
312 add_event_entry(spu->number);
313 add_event_entry(spu->pid);
314 add_event_entry(spu->tgid);
315 add_event_entry(app_dcookie);
316 add_event_entry(spu_cookie);
317 add_event_entry(offset);
318 spin_unlock_irqrestore(&buffer_lock, flags);
319 smp_wmb(); /* insure spu event buffer updates are written */
320 /* don't want entries intermingled... */
321out:
322 return retval;
323}
324
325/*
326 * This function is invoked on either a bind_context or unbind_context.
327 * If called for an unbind_context, the val arg is 0; otherwise,
328 * it is the object-id value for the spu context.
329 * The data arg is of type 'struct spu *'.
330 */
331static int spu_active_notify(struct notifier_block *self, unsigned long val,
332 void *data)
333{
334 int retval;
335 unsigned long flags;
336 struct spu *the_spu = data;
337
338 pr_debug("SPU event notification arrived\n");
339 if (!val) {
340 spin_lock_irqsave(&cache_lock, flags);
341 retval = release_cached_info(the_spu->number);
342 spin_unlock_irqrestore(&cache_lock, flags);
343 } else {
344 retval = process_context_switch(the_spu, val);
345 }
346 return retval;
347}
348
349static struct notifier_block spu_active = {
350 .notifier_call = spu_active_notify,
351};
352
353static int number_of_online_nodes(void)
354{
355 u32 cpu; u32 tmp;
356 int nodes = 0;
357 for_each_online_cpu(cpu) {
358 tmp = cbe_cpu_to_node(cpu) + 1;
359 if (tmp > nodes)
360 nodes++;
361 }
362 return nodes;
363}
364
365/* The main purpose of this function is to synchronize
366 * OProfile with SPUFS by registering to be notified of
367 * SPU task switches.
368 *
369 * NOTE: When profiling SPUs, we must ensure that only
370 * spu_sync_start is invoked and not the generic sync_start
371 * in drivers/oprofile/oprof.c. A return value of
372 * SKIP_GENERIC_SYNC or SYNC_START_ERROR will
373 * accomplish this.
374 */
375int spu_sync_start(void)
376{
377 int k;
378 int ret = SKIP_GENERIC_SYNC;
379 int register_ret;
380 unsigned long flags = 0;
381
382 spu_prof_num_nodes = number_of_online_nodes();
383 num_spu_nodes = spu_prof_num_nodes * 8;
384
385 spin_lock_irqsave(&buffer_lock, flags);
386 add_event_entry(ESCAPE_CODE);
387 add_event_entry(SPU_PROFILING_CODE);
388 add_event_entry(num_spu_nodes);
389 spin_unlock_irqrestore(&buffer_lock, flags);
390
391 /* Register for SPU events */
392 register_ret = spu_switch_event_register(&spu_active);
393 if (register_ret) {
394 ret = SYNC_START_ERROR;
395 goto out;
396 }
397
398 for (k = 0; k < (MAX_NUMNODES * 8); k++)
399 last_guard_val[k] = 0;
400 pr_debug("spu_sync_start -- running.\n");
401out:
402 return ret;
403}
404
405/* Record SPU program counter samples to the oprofile event buffer. */
406void spu_sync_buffer(int spu_num, unsigned int *samples,
407 int num_samples)
408{
409 unsigned long long file_offset;
410 unsigned long flags;
411 int i;
412 struct vma_to_fileoffset_map *map;
413 struct spu *the_spu;
414 unsigned long long spu_num_ll = spu_num;
415 unsigned long long spu_num_shifted = spu_num_ll << 32;
416 struct cached_info *c_info;
417
418 /* We need to obtain the cache_lock here because it's
419 * possible that after getting the cached_info, the SPU job
420 * corresponding to this cached_info may end, thus resulting
421 * in the destruction of the cached_info.
422 */
423 spin_lock_irqsave(&cache_lock, flags);
424 c_info = get_cached_info(NULL, spu_num);
425 if (!c_info) {
426 /* This legitimately happens when the SPU task ends before all
427 * samples are recorded.
428 * No big deal -- so we just drop a few samples.
429 */
430 pr_debug("SPU_PROF: No cached SPU contex "
431 "for SPU #%d. Dropping samples.\n", spu_num);
432 goto out;
433 }
434
435 map = c_info->map;
436 the_spu = c_info->the_spu;
437 spin_lock(&buffer_lock);
438 for (i = 0; i < num_samples; i++) {
439 unsigned int sample = *(samples+i);
440 int grd_val = 0;
441 file_offset = 0;
442 if (sample == 0)
443 continue;
444 file_offset = vma_map_lookup( map, sample, the_spu, &grd_val);
445
446 /* If overlays are used by this SPU application, the guard
447 * value is non-zero, indicating which overlay section is in
448 * use. We need to discard samples taken during the time
449 * period which an overlay occurs (i.e., guard value changes).
450 */
451 if (grd_val && grd_val != last_guard_val[spu_num]) {
452 last_guard_val[spu_num] = grd_val;
453 /* Drop the rest of the samples. */
454 break;
455 }
456
457 add_event_entry(file_offset | spu_num_shifted);
458 }
459 spin_unlock(&buffer_lock);
460out:
461 spin_unlock_irqrestore(&cache_lock, flags);
462}
463
464
465int spu_sync_stop(void)
466{
467 unsigned long flags = 0;
468 int ret = spu_switch_event_unregister(&spu_active);
469 if (ret) {
470 printk(KERN_ERR "SPU_PROF: "
471 "%s, line %d: spu_switch_event_unregister returned %d\n",
472 __FUNCTION__, __LINE__, ret);
473 goto out;
474 }
475
476 spin_lock_irqsave(&cache_lock, flags);
477 ret = release_cached_info(RELEASE_ALL);
478 spin_unlock_irqrestore(&cache_lock, flags);
479out:
480 pr_debug("spu_sync_stop -- done.\n");
481 return ret;
482}
483
484
diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c
new file mode 100644
index 000000000000..76ec1d16aef7
--- /dev/null
+++ b/arch/powerpc/oprofile/cell/vma_map.c
@@ -0,0 +1,287 @@
1/*
2 * Cell Broadband Engine OProfile Support
3 *
4 * (C) Copyright IBM Corporation 2006
5 *
6 * Author: Maynard Johnson <maynardj@us.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* The code in this source file is responsible for generating
15 * vma-to-fileOffset maps for both overlay and non-overlay SPU
16 * applications.
17 */
18
19#include <linux/mm.h>
20#include <linux/string.h>
21#include <linux/uaccess.h>
22#include <linux/elf.h>
23#include "pr_util.h"
24
25
26void vma_map_free(struct vma_to_fileoffset_map *map)
27{
28 while (map) {
29 struct vma_to_fileoffset_map *next = map->next;
30 kfree(map);
31 map = next;
32 }
33}
34
35unsigned int
36vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma,
37 const struct spu *aSpu, int *grd_val)
38{
39 /*
40 * Default the offset to the physical address + a flag value.
41 * Addresses of dynamically generated code can't be found in the vma
42 * map. For those addresses the flagged value will be sent on to
43 * the user space tools so they can be reported rather than just
44 * thrown away.
45 */
46 u32 offset = 0x10000000 + vma;
47 u32 ovly_grd;
48
49 for (; map; map = map->next) {
50 if (vma < map->vma || vma >= map->vma + map->size)
51 continue;
52
53 if (map->guard_ptr) {
54 ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr);
55 if (ovly_grd != map->guard_val)
56 continue;
57 *grd_val = ovly_grd;
58 }
59 offset = vma - map->vma + map->offset;
60 break;
61 }
62
63 return offset;
64}
65
66static struct vma_to_fileoffset_map *
67vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma,
68 unsigned int size, unsigned int offset, unsigned int guard_ptr,
69 unsigned int guard_val)
70{
71 struct vma_to_fileoffset_map *new =
72 kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
73 if (!new) {
74 printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
75 __FUNCTION__, __LINE__);
76 vma_map_free(map);
77 return NULL;
78 }
79
80 new->next = map;
81 new->vma = vma;
82 new->size = size;
83 new->offset = offset;
84 new->guard_ptr = guard_ptr;
85 new->guard_val = guard_val;
86
87 return new;
88}
89
90
91/* Parse SPE ELF header and generate a list of vma_maps.
92 * A pointer to the first vma_map in the generated list
93 * of vma_maps is returned. */
94struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu,
95 unsigned long spu_elf_start)
96{
97 static const unsigned char expected[EI_PAD] = {
98 [EI_MAG0] = ELFMAG0,
99 [EI_MAG1] = ELFMAG1,
100 [EI_MAG2] = ELFMAG2,
101 [EI_MAG3] = ELFMAG3,
102 [EI_CLASS] = ELFCLASS32,
103 [EI_DATA] = ELFDATA2MSB,
104 [EI_VERSION] = EV_CURRENT,
105 [EI_OSABI] = ELFOSABI_NONE
106 };
107
108 int grd_val;
109 struct vma_to_fileoffset_map *map = NULL;
110 struct spu_overlay_info ovly;
111 unsigned int overlay_tbl_offset = -1;
112 unsigned long phdr_start, shdr_start;
113 Elf32_Ehdr ehdr;
114 Elf32_Phdr phdr;
115 Elf32_Shdr shdr, shdr_str;
116 Elf32_Sym sym;
117 int i, j;
118 char name[32];
119
120 unsigned int ovly_table_sym = 0;
121 unsigned int ovly_buf_table_sym = 0;
122 unsigned int ovly_table_end_sym = 0;
123 unsigned int ovly_buf_table_end_sym = 0;
124 unsigned long ovly_table;
125 unsigned int n_ovlys;
126
127 /* Get and validate ELF header. */
128
129 if (copy_from_user(&ehdr, (void *) spu_elf_start, sizeof (ehdr)))
130 goto fail;
131
132 if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) {
133 printk(KERN_ERR "SPU_PROF: "
134 "%s, line %d: Unexpected e_ident parsing SPU ELF\n",
135 __FUNCTION__, __LINE__);
136 goto fail;
137 }
138 if (ehdr.e_machine != EM_SPU) {
139 printk(KERN_ERR "SPU_PROF: "
140 "%s, line %d: Unexpected e_machine parsing SPU ELF\n",
141 __FUNCTION__, __LINE__);
142 goto fail;
143 }
144 if (ehdr.e_type != ET_EXEC) {
145 printk(KERN_ERR "SPU_PROF: "
146 "%s, line %d: Unexpected e_type parsing SPU ELF\n",
147 __FUNCTION__, __LINE__);
148 goto fail;
149 }
150 phdr_start = spu_elf_start + ehdr.e_phoff;
151 shdr_start = spu_elf_start + ehdr.e_shoff;
152
153 /* Traverse program headers. */
154 for (i = 0; i < ehdr.e_phnum; i++) {
155 if (copy_from_user(&phdr,
156 (void *) (phdr_start + i * sizeof(phdr)),
157 sizeof(phdr)))
158 goto fail;
159
160 if (phdr.p_type != PT_LOAD)
161 continue;
162 if (phdr.p_flags & (1 << 27))
163 continue;
164
165 map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz,
166 phdr.p_offset, 0, 0);
167 if (!map)
168 goto fail;
169 }
170
171 pr_debug("SPU_PROF: Created non-overlay maps\n");
172 /* Traverse section table and search for overlay-related symbols. */
173 for (i = 0; i < ehdr.e_shnum; i++) {
174 if (copy_from_user(&shdr,
175 (void *) (shdr_start + i * sizeof(shdr)),
176 sizeof(shdr)))
177 goto fail;
178
179 if (shdr.sh_type != SHT_SYMTAB)
180 continue;
181 if (shdr.sh_entsize != sizeof (sym))
182 continue;
183
184 if (copy_from_user(&shdr_str,
185 (void *) (shdr_start + shdr.sh_link *
186 sizeof(shdr)),
187 sizeof(shdr)))
188 goto fail;
189
190 if (shdr_str.sh_type != SHT_STRTAB)
191 goto fail;;
192
193 for (j = 0; j < shdr.sh_size / sizeof (sym); j++) {
194 if (copy_from_user(&sym, (void *) (spu_elf_start +
195 shdr.sh_offset + j *
196 sizeof (sym)),
197 sizeof (sym)))
198 goto fail;
199
200 if (copy_from_user(name, (void *)
201 (spu_elf_start + shdr_str.sh_offset +
202 sym.st_name),
203 20))
204 goto fail;
205
206 if (memcmp(name, "_ovly_table", 12) == 0)
207 ovly_table_sym = sym.st_value;
208 if (memcmp(name, "_ovly_buf_table", 16) == 0)
209 ovly_buf_table_sym = sym.st_value;
210 if (memcmp(name, "_ovly_table_end", 16) == 0)
211 ovly_table_end_sym = sym.st_value;
212 if (memcmp(name, "_ovly_buf_table_end", 20) == 0)
213 ovly_buf_table_end_sym = sym.st_value;
214 }
215 }
216
217 /* If we don't have overlays, we're done. */
218 if (ovly_table_sym == 0 || ovly_buf_table_sym == 0
219 || ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) {
220 pr_debug("SPU_PROF: No overlay table found\n");
221 goto out;
222 } else {
223 pr_debug("SPU_PROF: Overlay table found\n");
224 }
225
226 /* The _ovly_table symbol represents a table with one entry
227 * per overlay section. The _ovly_buf_table symbol represents
228 * a table with one entry per overlay region.
229 * The struct spu_overlay_info gives the structure of the _ovly_table
230 * entries. The structure of _ovly_table_buf is simply one
231 * u32 word per entry.
232 */
233 overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym,
234 aSpu, &grd_val);
235 if (overlay_tbl_offset < 0) {
236 printk(KERN_ERR "SPU_PROF: "
237 "%s, line %d: Error finding SPU overlay table\n",
238 __FUNCTION__, __LINE__);
239 goto fail;
240 }
241 ovly_table = spu_elf_start + overlay_tbl_offset;
242
243 n_ovlys = (ovly_table_end_sym -
244 ovly_table_sym) / sizeof (ovly);
245
246 /* Traverse overlay table. */
247 for (i = 0; i < n_ovlys; i++) {
248 if (copy_from_user(&ovly, (void *)
249 (ovly_table + i * sizeof (ovly)),
250 sizeof (ovly)))
251 goto fail;
252
253 /* The ovly.vma/size/offset arguments are analogous to the same
254 * arguments used above for non-overlay maps. The final two
255 * args are referred to as the guard pointer and the guard
256 * value.
257 * The guard pointer is an entry in the _ovly_buf_table,
258 * computed using ovly.buf as the index into the table. Since
259 * ovly.buf values begin at '1' to reference the first (or 0th)
260 * entry in the _ovly_buf_table, the computation subtracts 1
261 * from ovly.buf.
262 * The guard value is stored in the _ovly_buf_table entry and
263 * is an index (starting at 1) back to the _ovly_table entry
264 * that is pointing at this _ovly_buf_table entry. So, for
265 * example, for an overlay scenario with one overlay segment
266 * and two overlay sections:
267 * - Section 1 points to the first entry of the
268 * _ovly_buf_table, which contains a guard value
269 * of '1', referencing the first (index=0) entry of
270 * _ovly_table.
271 * - Section 2 points to the second entry of the
272 * _ovly_buf_table, which contains a guard value
273 * of '2', referencing the second (index=1) entry of
274 * _ovly_table.
275 */
276 map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset,
277 ovly_buf_table_sym + (ovly.buf-1) * 4, i+1);
278 if (!map)
279 goto fail;
280 }
281 goto out;
282
283 fail:
284 map = NULL;
285 out:
286 return map;
287}
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index 1a7ef7e246d2..a28cce1d6c24 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -29,6 +29,8 @@ static struct op_powerpc_model *model;
29static struct op_counter_config ctr[OP_MAX_COUNTER]; 29static struct op_counter_config ctr[OP_MAX_COUNTER];
30static struct op_system_config sys; 30static struct op_system_config sys;
31 31
32static int op_per_cpu_rc;
33
32static void op_handle_interrupt(struct pt_regs *regs) 34static void op_handle_interrupt(struct pt_regs *regs)
33{ 35{
34 model->handle_interrupt(regs, ctr); 36 model->handle_interrupt(regs, ctr);
@@ -36,25 +38,41 @@ static void op_handle_interrupt(struct pt_regs *regs)
36 38
37static void op_powerpc_cpu_setup(void *dummy) 39static void op_powerpc_cpu_setup(void *dummy)
38{ 40{
39 model->cpu_setup(ctr); 41 int ret;
42
43 ret = model->cpu_setup(ctr);
44
45 if (ret != 0)
46 op_per_cpu_rc = ret;
40} 47}
41 48
42static int op_powerpc_setup(void) 49static int op_powerpc_setup(void)
43{ 50{
44 int err; 51 int err;
45 52
53 op_per_cpu_rc = 0;
54
46 /* Grab the hardware */ 55 /* Grab the hardware */
47 err = reserve_pmc_hardware(op_handle_interrupt); 56 err = reserve_pmc_hardware(op_handle_interrupt);
48 if (err) 57 if (err)
49 return err; 58 return err;
50 59
51 /* Pre-compute the values to stuff in the hardware registers. */ 60 /* Pre-compute the values to stuff in the hardware registers. */
52 model->reg_setup(ctr, &sys, model->num_counters); 61 op_per_cpu_rc = model->reg_setup(ctr, &sys, model->num_counters);
53 62
54 /* Configure the registers on all cpus. */ 63 if (op_per_cpu_rc)
64 goto out;
65
66 /* Configure the registers on all cpus. If an error occurs on one
67 * of the cpus, op_per_cpu_rc will be set to the error */
55 on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1); 68 on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1);
56 69
57 return 0; 70out: if (op_per_cpu_rc) {
71 /* error on setup release the performance counter hardware */
72 release_pmc_hardware();
73 }
74
75 return op_per_cpu_rc;
58} 76}
59 77
60static void op_powerpc_shutdown(void) 78static void op_powerpc_shutdown(void)
@@ -64,16 +82,29 @@ static void op_powerpc_shutdown(void)
64 82
65static void op_powerpc_cpu_start(void *dummy) 83static void op_powerpc_cpu_start(void *dummy)
66{ 84{
67 model->start(ctr); 85 /* If any of the cpus have return an error, set the
86 * global flag to the error so it can be returned
87 * to the generic OProfile caller.
88 */
89 int ret;
90
91 ret = model->start(ctr);
92 if (ret != 0)
93 op_per_cpu_rc = ret;
68} 94}
69 95
70static int op_powerpc_start(void) 96static int op_powerpc_start(void)
71{ 97{
98 op_per_cpu_rc = 0;
99
72 if (model->global_start) 100 if (model->global_start)
73 model->global_start(ctr); 101 return model->global_start(ctr);
74 if (model->start) 102 if (model->start) {
75 on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1); 103 on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1);
76 return 0; 104 return op_per_cpu_rc;
105 }
106 return -EIO; /* No start function is defined for this
107 power architecture */
77} 108}
78 109
79static inline void op_powerpc_cpu_stop(void *dummy) 110static inline void op_powerpc_cpu_stop(void *dummy)
@@ -147,11 +178,13 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
147 178
148 switch (cur_cpu_spec->oprofile_type) { 179 switch (cur_cpu_spec->oprofile_type) {
149#ifdef CONFIG_PPC64 180#ifdef CONFIG_PPC64
150#ifdef CONFIG_PPC_CELL_NATIVE 181#ifdef CONFIG_OPROFILE_CELL
151 case PPC_OPROFILE_CELL: 182 case PPC_OPROFILE_CELL:
152 if (firmware_has_feature(FW_FEATURE_LPAR)) 183 if (firmware_has_feature(FW_FEATURE_LPAR))
153 return -ENODEV; 184 return -ENODEV;
154 model = &op_model_cell; 185 model = &op_model_cell;
186 ops->sync_start = model->sync_start;
187 ops->sync_stop = model->sync_stop;
155 break; 188 break;
156#endif 189#endif
157 case PPC_OPROFILE_RS64: 190 case PPC_OPROFILE_RS64:
diff --git a/arch/powerpc/oprofile/op_model_7450.c b/arch/powerpc/oprofile/op_model_7450.c
index 5d1bbaf35ccb..cc599eb8768b 100644
--- a/arch/powerpc/oprofile/op_model_7450.c
+++ b/arch/powerpc/oprofile/op_model_7450.c
@@ -81,7 +81,7 @@ static void pmc_stop_ctrs(void)
81 81
82/* Configures the counters on this CPU based on the global 82/* Configures the counters on this CPU based on the global
83 * settings */ 83 * settings */
84static void fsl7450_cpu_setup(struct op_counter_config *ctr) 84static int fsl7450_cpu_setup(struct op_counter_config *ctr)
85{ 85{
86 /* freeze all counters */ 86 /* freeze all counters */
87 pmc_stop_ctrs(); 87 pmc_stop_ctrs();
@@ -89,12 +89,14 @@ static void fsl7450_cpu_setup(struct op_counter_config *ctr)
89 mtspr(SPRN_MMCR0, mmcr0_val); 89 mtspr(SPRN_MMCR0, mmcr0_val);
90 mtspr(SPRN_MMCR1, mmcr1_val); 90 mtspr(SPRN_MMCR1, mmcr1_val);
91 mtspr(SPRN_MMCR2, mmcr2_val); 91 mtspr(SPRN_MMCR2, mmcr2_val);
92
93 return 0;
92} 94}
93 95
94#define NUM_CTRS 6 96#define NUM_CTRS 6
95 97
96/* Configures the global settings for the countes on all CPUs. */ 98/* Configures the global settings for the countes on all CPUs. */
97static void fsl7450_reg_setup(struct op_counter_config *ctr, 99static int fsl7450_reg_setup(struct op_counter_config *ctr,
98 struct op_system_config *sys, 100 struct op_system_config *sys,
99 int num_ctrs) 101 int num_ctrs)
100{ 102{
@@ -126,10 +128,12 @@ static void fsl7450_reg_setup(struct op_counter_config *ctr,
126 | mmcr1_event6(ctr[5].event); 128 | mmcr1_event6(ctr[5].event);
127 129
128 mmcr2_val = 0; 130 mmcr2_val = 0;
131
132 return 0;
129} 133}
130 134
131/* Sets the counters on this CPU to the chosen values, and starts them */ 135/* Sets the counters on this CPU to the chosen values, and starts them */
132static void fsl7450_start(struct op_counter_config *ctr) 136static int fsl7450_start(struct op_counter_config *ctr)
133{ 137{
134 int i; 138 int i;
135 139
@@ -148,6 +152,8 @@ static void fsl7450_start(struct op_counter_config *ctr)
148 pmc_start_ctrs(); 152 pmc_start_ctrs();
149 153
150 oprofile_running = 1; 154 oprofile_running = 1;
155
156 return 0;
151} 157}
152 158
153/* Stop the counters on this CPU */ 159/* Stop the counters on this CPU */
@@ -193,7 +199,7 @@ static void fsl7450_handle_interrupt(struct pt_regs *regs,
193 /* The freeze bit was set by the interrupt. */ 199 /* The freeze bit was set by the interrupt. */
194 /* Clear the freeze bit, and reenable the interrupt. 200 /* Clear the freeze bit, and reenable the interrupt.
195 * The counters won't actually start until the rfi clears 201 * The counters won't actually start until the rfi clears
196 * the PMM bit */ 202 * the PM/M bit */
197 pmc_start_ctrs(); 203 pmc_start_ctrs();
198} 204}
199 205
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index c29293befba9..d928b54f3a0f 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -5,8 +5,8 @@
5 * 5 *
6 * Author: David Erb (djerb@us.ibm.com) 6 * Author: David Erb (djerb@us.ibm.com)
7 * Modifications: 7 * Modifications:
8 * Carl Love <carll@us.ibm.com> 8 * Carl Love <carll@us.ibm.com>
9 * Maynard Johnson <maynardj@us.ibm.com> 9 * Maynard Johnson <maynardj@us.ibm.com>
10 * 10 *
11 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License 12 * modify it under the terms of the GNU General Public License
@@ -38,12 +38,25 @@
38 38
39#include "../platforms/cell/interrupt.h" 39#include "../platforms/cell/interrupt.h"
40#include "../platforms/cell/cbe_regs.h" 40#include "../platforms/cell/cbe_regs.h"
41#include "cell/pr_util.h"
42
43static void cell_global_stop_spu(void);
44
45/*
46 * spu_cycle_reset is the number of cycles between samples.
47 * This variable is used for SPU profiling and should ONLY be set
48 * at the beginning of cell_reg_setup; otherwise, it's read-only.
49 */
50static unsigned int spu_cycle_reset;
51
52#define NUM_SPUS_PER_NODE 8
53#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */
41 54
42#define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ 55#define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */
43#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying 56#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying
44 * PPU_CYCLES event 57 * PPU_CYCLES event
45 */ 58 */
46#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ 59#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */
47 60
48#define NUM_THREADS 2 /* number of physical threads in 61#define NUM_THREADS 2 /* number of physical threads in
49 * physical processor 62 * physical processor
@@ -51,6 +64,7 @@
51#define NUM_TRACE_BUS_WORDS 4 64#define NUM_TRACE_BUS_WORDS 4
52#define NUM_INPUT_BUS_WORDS 2 65#define NUM_INPUT_BUS_WORDS 2
53 66
67#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */
54 68
55struct pmc_cntrl_data { 69struct pmc_cntrl_data {
56 unsigned long vcntr; 70 unsigned long vcntr;
@@ -62,11 +76,10 @@ struct pmc_cntrl_data {
62/* 76/*
63 * ibm,cbe-perftools rtas parameters 77 * ibm,cbe-perftools rtas parameters
64 */ 78 */
65
66struct pm_signal { 79struct pm_signal {
67 u16 cpu; /* Processor to modify */ 80 u16 cpu; /* Processor to modify */
68 u16 sub_unit; /* hw subunit this applies to (if applicable) */ 81 u16 sub_unit; /* hw subunit this applies to (if applicable)*/
69 short int signal_group; /* Signal Group to Enable/Disable */ 82 short int signal_group; /* Signal Group to Enable/Disable */
70 u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event 83 u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event
71 * Bus Word(s) (bitmask) 84 * Bus Word(s) (bitmask)
72 */ 85 */
@@ -112,21 +125,42 @@ static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
112 125
113static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; 126static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
114 127
115/* Interpetation of hdw_thread: 128/*
129 * The CELL profiling code makes rtas calls to setup the debug bus to
130 * route the performance signals. Additionally, SPU profiling requires
131 * a second rtas call to setup the hardware to capture the SPU PCs.
132 * The EIO error value is returned if the token lookups or the rtas
133 * call fail. The EIO error number is the best choice of the existing
134 * error numbers. The probability of rtas related error is very low. But
135 * by returning EIO and printing additional information to dmsg the user
136 * will know that OProfile did not start and dmesg will tell them why.
137 * OProfile does not support returning errors on Stop. Not a huge issue
138 * since failure to reset the debug bus or stop the SPU PC collection is
139 * not a fatel issue. Chances are if the Stop failed, Start doesn't work
140 * either.
141 */
142
143/*
144 * Interpetation of hdw_thread:
116 * 0 - even virtual cpus 0, 2, 4,... 145 * 0 - even virtual cpus 0, 2, 4,...
117 * 1 - odd virtual cpus 1, 3, 5, ... 146 * 1 - odd virtual cpus 1, 3, 5, ...
147 *
148 * FIXME: this is strictly wrong, we need to clean this up in a number
149 * of places. It works for now. -arnd
118 */ 150 */
119static u32 hdw_thread; 151static u32 hdw_thread;
120 152
121static u32 virt_cntr_inter_mask; 153static u32 virt_cntr_inter_mask;
122static struct timer_list timer_virt_cntr; 154static struct timer_list timer_virt_cntr;
123 155
124/* pm_signal needs to be global since it is initialized in 156/*
157 * pm_signal needs to be global since it is initialized in
125 * cell_reg_setup at the time when the necessary information 158 * cell_reg_setup at the time when the necessary information
126 * is available. 159 * is available.
127 */ 160 */
128static struct pm_signal pm_signal[NR_PHYS_CTRS]; 161static struct pm_signal pm_signal[NR_PHYS_CTRS];
129static int pm_rtas_token; 162static int pm_rtas_token; /* token for debug bus setup call */
163static int spu_rtas_token; /* token for SPU cycle profiling */
130 164
131static u32 reset_value[NR_PHYS_CTRS]; 165static u32 reset_value[NR_PHYS_CTRS];
132static int num_counters; 166static int num_counters;
@@ -147,8 +181,8 @@ rtas_ibm_cbe_perftools(int subfunc, int passthru,
147{ 181{
148 u64 paddr = __pa(address); 182 u64 paddr = __pa(address);
149 183
150 return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru, 184 return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc,
151 paddr >> 32, paddr & 0xffffffff, length); 185 passthru, paddr >> 32, paddr & 0xffffffff, length);
152} 186}
153 187
154static void pm_rtas_reset_signals(u32 node) 188static void pm_rtas_reset_signals(u32 node)
@@ -156,12 +190,13 @@ static void pm_rtas_reset_signals(u32 node)
156 int ret; 190 int ret;
157 struct pm_signal pm_signal_local; 191 struct pm_signal pm_signal_local;
158 192
159 /* The debug bus is being set to the passthru disable state. 193 /*
160 * However, the FW still expects atleast one legal signal routing 194 * The debug bus is being set to the passthru disable state.
161 * entry or it will return an error on the arguments. If we don't 195 * However, the FW still expects atleast one legal signal routing
162 * supply a valid entry, we must ignore all return values. Ignoring 196 * entry or it will return an error on the arguments. If we don't
163 * all return values means we might miss an error we should be 197 * supply a valid entry, we must ignore all return values. Ignoring
164 * concerned about. 198 * all return values means we might miss an error we should be
199 * concerned about.
165 */ 200 */
166 201
167 /* fw expects physical cpu #. */ 202 /* fw expects physical cpu #. */
@@ -175,18 +210,24 @@ static void pm_rtas_reset_signals(u32 node)
175 &pm_signal_local, 210 &pm_signal_local,
176 sizeof(struct pm_signal)); 211 sizeof(struct pm_signal));
177 212
178 if (ret) 213 if (unlikely(ret))
214 /*
215 * Not a fatal error. For Oprofile stop, the oprofile
216 * functions do not support returning an error for
217 * failure to stop OProfile.
218 */
179 printk(KERN_WARNING "%s: rtas returned: %d\n", 219 printk(KERN_WARNING "%s: rtas returned: %d\n",
180 __FUNCTION__, ret); 220 __FUNCTION__, ret);
181} 221}
182 222
183static void pm_rtas_activate_signals(u32 node, u32 count) 223static int pm_rtas_activate_signals(u32 node, u32 count)
184{ 224{
185 int ret; 225 int ret;
186 int i, j; 226 int i, j;
187 struct pm_signal pm_signal_local[NR_PHYS_CTRS]; 227 struct pm_signal pm_signal_local[NR_PHYS_CTRS];
188 228
189 /* There is no debug setup required for the cycles event. 229 /*
230 * There is no debug setup required for the cycles event.
190 * Note that only events in the same group can be used. 231 * Note that only events in the same group can be used.
191 * Otherwise, there will be conflicts in correctly routing 232 * Otherwise, there will be conflicts in correctly routing
192 * the signals on the debug bus. It is the responsiblity 233 * the signals on the debug bus. It is the responsiblity
@@ -213,10 +254,14 @@ static void pm_rtas_activate_signals(u32 node, u32 count)
213 pm_signal_local, 254 pm_signal_local,
214 i * sizeof(struct pm_signal)); 255 i * sizeof(struct pm_signal));
215 256
216 if (ret) 257 if (unlikely(ret)) {
217 printk(KERN_WARNING "%s: rtas returned: %d\n", 258 printk(KERN_WARNING "%s: rtas returned: %d\n",
218 __FUNCTION__, ret); 259 __FUNCTION__, ret);
260 return -EIO;
261 }
219 } 262 }
263
264 return 0;
220} 265}
221 266
222/* 267/*
@@ -260,11 +305,12 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
260 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); 305 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity);
261 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); 306 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control);
262 307
263 /* Some of the islands signal selection is based on 64 bit words. 308 /*
309 * Some of the islands signal selection is based on 64 bit words.
264 * The debug bus words are 32 bits, the input words to the performance 310 * The debug bus words are 32 bits, the input words to the performance
265 * counters are defined as 32 bits. Need to convert the 64 bit island 311 * counters are defined as 32 bits. Need to convert the 64 bit island
266 * specification to the appropriate 32 input bit and bus word for the 312 * specification to the appropriate 32 input bit and bus word for the
267 * performance counter event selection. See the CELL Performance 313 * performance counter event selection. See the CELL Performance
268 * monitoring signals manual and the Perf cntr hardware descriptions 314 * monitoring signals manual and the Perf cntr hardware descriptions
269 * for the details. 315 * for the details.
270 */ 316 */
@@ -298,6 +344,7 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
298 input_bus[j] = i; 344 input_bus[j] = i;
299 pm_regs.group_control |= 345 pm_regs.group_control |=
300 (i << (31 - i)); 346 (i << (31 - i));
347
301 break; 348 break;
302 } 349 }
303 } 350 }
@@ -309,7 +356,8 @@ out:
309 356
310static void write_pm_cntrl(int cpu) 357static void write_pm_cntrl(int cpu)
311{ 358{
312 /* Oprofile will use 32 bit counters, set bits 7:10 to 0 359 /*
360 * Oprofile will use 32 bit counters, set bits 7:10 to 0
313 * pmregs.pm_cntrl is a global 361 * pmregs.pm_cntrl is a global
314 */ 362 */
315 363
@@ -326,7 +374,8 @@ static void write_pm_cntrl(int cpu)
326 if (pm_regs.pm_cntrl.freeze == 1) 374 if (pm_regs.pm_cntrl.freeze == 1)
327 val |= CBE_PM_FREEZE_ALL_CTRS; 375 val |= CBE_PM_FREEZE_ALL_CTRS;
328 376
329 /* Routine set_count_mode must be called previously to set 377 /*
378 * Routine set_count_mode must be called previously to set
330 * the count mode based on the user selection of user and kernel. 379 * the count mode based on the user selection of user and kernel.
331 */ 380 */
332 val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); 381 val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode);
@@ -336,7 +385,8 @@ static void write_pm_cntrl(int cpu)
336static inline void 385static inline void
337set_count_mode(u32 kernel, u32 user) 386set_count_mode(u32 kernel, u32 user)
338{ 387{
339 /* The user must specify user and kernel if they want them. If 388 /*
389 * The user must specify user and kernel if they want them. If
340 * neither is specified, OProfile will count in hypervisor mode. 390 * neither is specified, OProfile will count in hypervisor mode.
341 * pm_regs.pm_cntrl is a global 391 * pm_regs.pm_cntrl is a global
342 */ 392 */
@@ -364,7 +414,7 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl)
364 414
365/* 415/*
366 * Oprofile is expected to collect data on all CPUs simultaneously. 416 * Oprofile is expected to collect data on all CPUs simultaneously.
367 * However, there is one set of performance counters per node. There are 417 * However, there is one set of performance counters per node. There are
368 * two hardware threads or virtual CPUs on each node. Hence, OProfile must 418 * two hardware threads or virtual CPUs on each node. Hence, OProfile must
369 * multiplex in time the performance counter collection on the two virtual 419 * multiplex in time the performance counter collection on the two virtual
370 * CPUs. The multiplexing of the performance counters is done by this 420 * CPUs. The multiplexing of the performance counters is done by this
@@ -377,19 +427,19 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl)
377 * pair of per-cpu arrays is used for storing the previous and next 427 * pair of per-cpu arrays is used for storing the previous and next
378 * pmc values for a given node. 428 * pmc values for a given node.
379 * NOTE: We use the per-cpu variable to improve cache performance. 429 * NOTE: We use the per-cpu variable to improve cache performance.
430 *
431 * This routine will alternate loading the virtual counters for
432 * virtual CPUs
380 */ 433 */
381static void cell_virtual_cntr(unsigned long data) 434static void cell_virtual_cntr(unsigned long data)
382{ 435{
383 /* This routine will alternate loading the virtual counters for
384 * virtual CPUs
385 */
386 int i, prev_hdw_thread, next_hdw_thread; 436 int i, prev_hdw_thread, next_hdw_thread;
387 u32 cpu; 437 u32 cpu;
388 unsigned long flags; 438 unsigned long flags;
389 439
390 /* Make sure that the interrupt_hander and 440 /*
391 * the virt counter are not both playing with 441 * Make sure that the interrupt_hander and the virt counter are
392 * the counters on the same node. 442 * not both playing with the counters on the same node.
393 */ 443 */
394 444
395 spin_lock_irqsave(&virt_cntr_lock, flags); 445 spin_lock_irqsave(&virt_cntr_lock, flags);
@@ -400,22 +450,25 @@ static void cell_virtual_cntr(unsigned long data)
400 hdw_thread = 1 ^ hdw_thread; 450 hdw_thread = 1 ^ hdw_thread;
401 next_hdw_thread = hdw_thread; 451 next_hdw_thread = hdw_thread;
402 452
403 for (i = 0; i < num_counters; i++) 453 /*
404 /* There are some per thread events. Must do the 454 * There are some per thread events. Must do the
405 * set event, for the thread that is being started 455 * set event, for the thread that is being started
406 */ 456 */
457 for (i = 0; i < num_counters; i++)
407 set_pm_event(i, 458 set_pm_event(i,
408 pmc_cntrl[next_hdw_thread][i].evnts, 459 pmc_cntrl[next_hdw_thread][i].evnts,
409 pmc_cntrl[next_hdw_thread][i].masks); 460 pmc_cntrl[next_hdw_thread][i].masks);
410 461
411 /* The following is done only once per each node, but 462 /*
463 * The following is done only once per each node, but
412 * we need cpu #, not node #, to pass to the cbe_xxx functions. 464 * we need cpu #, not node #, to pass to the cbe_xxx functions.
413 */ 465 */
414 for_each_online_cpu(cpu) { 466 for_each_online_cpu(cpu) {
415 if (cbe_get_hw_thread_id(cpu)) 467 if (cbe_get_hw_thread_id(cpu))
416 continue; 468 continue;
417 469
418 /* stop counters, save counter values, restore counts 470 /*
471 * stop counters, save counter values, restore counts
419 * for previous thread 472 * for previous thread
420 */ 473 */
421 cbe_disable_pm(cpu); 474 cbe_disable_pm(cpu);
@@ -428,7 +481,7 @@ static void cell_virtual_cntr(unsigned long data)
428 == 0xFFFFFFFF) 481 == 0xFFFFFFFF)
429 /* If the cntr value is 0xffffffff, we must 482 /* If the cntr value is 0xffffffff, we must
430 * reset that to 0xfffffff0 when the current 483 * reset that to 0xfffffff0 when the current
431 * thread is restarted. This will generate a 484 * thread is restarted. This will generate a
432 * new interrupt and make sure that we never 485 * new interrupt and make sure that we never
433 * restore the counters to the max value. If 486 * restore the counters to the max value. If
434 * the counters were restored to the max value, 487 * the counters were restored to the max value,
@@ -444,13 +497,15 @@ static void cell_virtual_cntr(unsigned long data)
444 next_hdw_thread)[i]); 497 next_hdw_thread)[i]);
445 } 498 }
446 499
447 /* Switch to the other thread. Change the interrupt 500 /*
501 * Switch to the other thread. Change the interrupt
448 * and control regs to be scheduled on the CPU 502 * and control regs to be scheduled on the CPU
449 * corresponding to the thread to execute. 503 * corresponding to the thread to execute.
450 */ 504 */
451 for (i = 0; i < num_counters; i++) { 505 for (i = 0; i < num_counters; i++) {
452 if (pmc_cntrl[next_hdw_thread][i].enabled) { 506 if (pmc_cntrl[next_hdw_thread][i].enabled) {
453 /* There are some per thread events. 507 /*
508 * There are some per thread events.
454 * Must do the set event, enable_cntr 509 * Must do the set event, enable_cntr
455 * for each cpu. 510 * for each cpu.
456 */ 511 */
@@ -482,17 +537,42 @@ static void start_virt_cntrs(void)
482} 537}
483 538
484/* This function is called once for all cpus combined */ 539/* This function is called once for all cpus combined */
485static void 540static int cell_reg_setup(struct op_counter_config *ctr,
486cell_reg_setup(struct op_counter_config *ctr, 541 struct op_system_config *sys, int num_ctrs)
487 struct op_system_config *sys, int num_ctrs)
488{ 542{
489 int i, j, cpu; 543 int i, j, cpu;
544 spu_cycle_reset = 0;
545
546 if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
547 spu_cycle_reset = ctr[0].count;
548
549 /*
550 * Each node will need to make the rtas call to start
551 * and stop SPU profiling. Get the token once and store it.
552 */
553 spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
554
555 if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
556 printk(KERN_ERR
557 "%s: rtas token ibm,cbe-spu-perftools unknown\n",
558 __FUNCTION__);
559 return -EIO;
560 }
561 }
490 562
491 pm_rtas_token = rtas_token("ibm,cbe-perftools"); 563 pm_rtas_token = rtas_token("ibm,cbe-perftools");
492 if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) { 564
493 printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n", 565 /*
566 * For all events excetp PPU CYCLEs, each node will need to make
567 * the rtas cbe-perftools call to setup and reset the debug bus.
568 * Make the token lookup call once and store it in the global
569 * variable pm_rtas_token.
570 */
571 if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
572 printk(KERN_ERR
573 "%s: rtas token ibm,cbe-perftools unknown\n",
494 __FUNCTION__); 574 __FUNCTION__);
495 goto out; 575 return -EIO;
496 } 576 }
497 577
498 num_counters = num_ctrs; 578 num_counters = num_ctrs;
@@ -520,7 +600,8 @@ cell_reg_setup(struct op_counter_config *ctr,
520 per_cpu(pmc_values, j)[i] = 0; 600 per_cpu(pmc_values, j)[i] = 0;
521 } 601 }
522 602
523 /* Setup the thread 1 events, map the thread 0 event to the 603 /*
604 * Setup the thread 1 events, map the thread 0 event to the
524 * equivalent thread 1 event. 605 * equivalent thread 1 event.
525 */ 606 */
526 for (i = 0; i < num_ctrs; ++i) { 607 for (i = 0; i < num_ctrs; ++i) {
@@ -544,9 +625,10 @@ cell_reg_setup(struct op_counter_config *ctr,
544 for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) 625 for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
545 input_bus[i] = 0xff; 626 input_bus[i] = 0xff;
546 627
547 /* Our counters count up, and "count" refers to 628 /*
629 * Our counters count up, and "count" refers to
548 * how much before the next interrupt, and we interrupt 630 * how much before the next interrupt, and we interrupt
549 * on overflow. So we calculate the starting value 631 * on overflow. So we calculate the starting value
550 * which will give us "count" until overflow. 632 * which will give us "count" until overflow.
551 * Then we set the events on the enabled counters. 633 * Then we set the events on the enabled counters.
552 */ 634 */
@@ -569,28 +651,27 @@ cell_reg_setup(struct op_counter_config *ctr,
569 for (i = 0; i < num_counters; ++i) { 651 for (i = 0; i < num_counters; ++i) {
570 per_cpu(pmc_values, cpu)[i] = reset_value[i]; 652 per_cpu(pmc_values, cpu)[i] = reset_value[i];
571 } 653 }
572out: 654
573 ; 655 return 0;
574} 656}
575 657
658
659
576/* This function is called once for each cpu */ 660/* This function is called once for each cpu */
577static void cell_cpu_setup(struct op_counter_config *cntr) 661static int cell_cpu_setup(struct op_counter_config *cntr)
578{ 662{
579 u32 cpu = smp_processor_id(); 663 u32 cpu = smp_processor_id();
580 u32 num_enabled = 0; 664 u32 num_enabled = 0;
581 int i; 665 int i;
582 666
667 if (spu_cycle_reset)
668 return 0;
669
583 /* There is one performance monitor per processor chip (i.e. node), 670 /* There is one performance monitor per processor chip (i.e. node),
584 * so we only need to perform this function once per node. 671 * so we only need to perform this function once per node.
585 */ 672 */
586 if (cbe_get_hw_thread_id(cpu)) 673 if (cbe_get_hw_thread_id(cpu))
587 goto out; 674 return 0;
588
589 if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) {
590 printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n",
591 __FUNCTION__);
592 goto out;
593 }
594 675
595 /* Stop all counters */ 676 /* Stop all counters */
596 cbe_disable_pm(cpu); 677 cbe_disable_pm(cpu);
@@ -609,16 +690,286 @@ static void cell_cpu_setup(struct op_counter_config *cntr)
609 } 690 }
610 } 691 }
611 692
612 pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); 693 /*
694 * The pm_rtas_activate_signals will return -EIO if the FW
695 * call failed.
696 */
697 return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
698}
699
700#define ENTRIES 303
701#define MAXLFSR 0xFFFFFF
702
703/* precomputed table of 24 bit LFSR values */
704static int initial_lfsr[] = {
705 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
706 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716,
707 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547,
708 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392,
709 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026,
710 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556,
711 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769,
712 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893,
713 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017,
714 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756,
715 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558,
716 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401,
717 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720,
718 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042,
719 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955,
720 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934,
721 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783,
722 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278,
723 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051,
724 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741,
725 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972,
726 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302,
727 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384,
728 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469,
729 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697,
730 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398,
731 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140,
732 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214,
733 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386,
734 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087,
735 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130,
736 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300,
737 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475,
738 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950,
739 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003,
740 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375,
741 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426,
742 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607
743};
744
745/*
746 * The hardware uses an LFSR counting sequence to determine when to capture
747 * the SPU PCs. An LFSR sequence is like a puesdo random number sequence
748 * where each number occurs once in the sequence but the sequence is not in
749 * numerical order. The SPU PC capture is done when the LFSR sequence reaches
750 * the last value in the sequence. Hence the user specified value N
751 * corresponds to the LFSR number that is N from the end of the sequence.
752 *
753 * To avoid the time to compute the LFSR, a lookup table is used. The 24 bit
754 * LFSR sequence is broken into four ranges. The spacing of the precomputed
755 * values is adjusted in each range so the error between the user specifed
756 * number (N) of events between samples and the actual number of events based
757 * on the precomputed value will be les then about 6.2%. Note, if the user
758 * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used.
759 * This is to prevent the loss of samples because the trace buffer is full.
760 *
761 * User specified N Step between Index in
762 * precomputed values precomputed
763 * table
764 * 0 to 2^16-1 ---- 0
765 * 2^16 to 2^16+2^19-1 2^12 1 to 128
766 * 2^16+2^19 to 2^16+2^19+2^22-1 2^15 129 to 256
767 * 2^16+2^19+2^22 to 2^24-1 2^18 257 to 302
768 *
769 *
770 * For example, the LFSR values in the second range are computed for 2^16,
771 * 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies
772 * 1, 2,..., 127, 128.
773 *
774 * The 24 bit LFSR value for the nth number in the sequence can be
775 * calculated using the following code:
776 *
777 * #define size 24
778 * int calculate_lfsr(int n)
779 * {
780 * int i;
781 * unsigned int newlfsr0;
782 * unsigned int lfsr = 0xFFFFFF;
783 * unsigned int howmany = n;
784 *
785 * for (i = 2; i < howmany + 2; i++) {
786 * newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^
787 * ((lfsr >> (size - 1 - 1)) & 1) ^
788 * (((lfsr >> (size - 1 - 6)) & 1) ^
789 * ((lfsr >> (size - 1 - 23)) & 1)));
790 *
791 * lfsr >>= 1;
792 * lfsr = lfsr | (newlfsr0 << (size - 1));
793 * }
794 * return lfsr;
795 * }
796 */
797
798#define V2_16 (0x1 << 16)
799#define V2_19 (0x1 << 19)
800#define V2_22 (0x1 << 22)
801
802static int calculate_lfsr(int n)
803{
804 /*
805 * The ranges and steps are in powers of 2 so the calculations
806 * can be done using shifts rather then divide.
807 */
808 int index;
809
810 if ((n >> 16) == 0)
811 index = 0;
812 else if (((n - V2_16) >> 19) == 0)
813 index = ((n - V2_16) >> 12) + 1;
814 else if (((n - V2_16 - V2_19) >> 22) == 0)
815 index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128;
816 else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0)
817 index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256;
818 else
819 index = ENTRIES-1;
820
821 /* make sure index is valid */
822 if ((index > ENTRIES) || (index < 0))
823 index = ENTRIES-1;
824
825 return initial_lfsr[index];
826}
827
828static int pm_rtas_activate_spu_profiling(u32 node)
829{
830 int ret, i;
831 struct pm_signal pm_signal_local[NR_PHYS_CTRS];
832
833 /*
834 * Set up the rtas call to configure the debug bus to
835 * route the SPU PCs. Setup the pm_signal for each SPU
836 */
837 for (i = 0; i < NUM_SPUS_PER_NODE; i++) {
838 pm_signal_local[i].cpu = node;
839 pm_signal_local[i].signal_group = 41;
840 /* spu i on word (i/2) */
841 pm_signal_local[i].bus_word = 1 << i / 2;
842 /* spu i */
843 pm_signal_local[i].sub_unit = i;
844 pm_signal_local[i].bit = 63;
845 }
846
847 ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE,
848 PASSTHRU_ENABLE, pm_signal_local,
849 (NUM_SPUS_PER_NODE
850 * sizeof(struct pm_signal)));
851
852 if (unlikely(ret)) {
853 printk(KERN_WARNING "%s: rtas returned: %d\n",
854 __FUNCTION__, ret);
855 return -EIO;
856 }
857
858 return 0;
859}
860
861#ifdef CONFIG_CPU_FREQ
862static int
863oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data)
864{
865 int ret = 0;
866 struct cpufreq_freqs *frq = data;
867 if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) ||
868 (val == CPUFREQ_POSTCHANGE && frq->old > frq->new) ||
869 (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE))
870 set_spu_profiling_frequency(frq->new, spu_cycle_reset);
871 return ret;
872}
873
874static struct notifier_block cpu_freq_notifier_block = {
875 .notifier_call = oprof_cpufreq_notify
876};
877#endif
878
879static int cell_global_start_spu(struct op_counter_config *ctr)
880{
881 int subfunc;
882 unsigned int lfsr_value;
883 int cpu;
884 int ret;
885 int rtas_error;
886 unsigned int cpu_khzfreq = 0;
887
888 /* The SPU profiling uses time-based profiling based on
889 * cpu frequency, so if configured with the CPU_FREQ
890 * option, we should detect frequency changes and react
891 * accordingly.
892 */
893#ifdef CONFIG_CPU_FREQ
894 ret = cpufreq_register_notifier(&cpu_freq_notifier_block,
895 CPUFREQ_TRANSITION_NOTIFIER);
896 if (ret < 0)
897 /* this is not a fatal error */
898 printk(KERN_ERR "CPU freq change registration failed: %d\n",
899 ret);
900
901 else
902 cpu_khzfreq = cpufreq_quick_get(smp_processor_id());
903#endif
904
905 set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset);
906
907 for_each_online_cpu(cpu) {
908 if (cbe_get_hw_thread_id(cpu))
909 continue;
910
911 /*
912 * Setup SPU cycle-based profiling.
913 * Set perf_mon_control bit 0 to a zero before
914 * enabling spu collection hardware.
915 */
916 cbe_write_pm(cpu, pm_control, 0);
917
918 if (spu_cycle_reset > MAX_SPU_COUNT)
919 /* use largest possible value */
920 lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1);
921 else
922 lfsr_value = calculate_lfsr(spu_cycle_reset);
923
924 /* must use a non zero value. Zero disables data collection. */
925 if (lfsr_value == 0)
926 lfsr_value = calculate_lfsr(1);
927
928 lfsr_value = lfsr_value << 8; /* shift lfsr to correct
929 * register location
930 */
931
932 /* debug bus setup */
933 ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu));
934
935 if (unlikely(ret)) {
936 rtas_error = ret;
937 goto out;
938 }
939
940
941 subfunc = 2; /* 2 - activate SPU tracing, 3 - deactivate */
942
943 /* start profiling */
944 ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
945 cbe_cpu_to_node(cpu), lfsr_value);
946
947 if (unlikely(ret != 0)) {
948 printk(KERN_ERR
949 "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
950 __FUNCTION__, ret);
951 rtas_error = -EIO;
952 goto out;
953 }
954 }
955
956 rtas_error = start_spu_profiling(spu_cycle_reset);
957 if (rtas_error)
958 goto out_stop;
959
960 oprofile_running = 1;
961 return 0;
962
963out_stop:
964 cell_global_stop_spu(); /* clean up the PMU/debug bus */
613out: 965out:
614 ; 966 return rtas_error;
615} 967}
616 968
617static void cell_global_start(struct op_counter_config *ctr) 969static int cell_global_start_ppu(struct op_counter_config *ctr)
618{ 970{
619 u32 cpu; 971 u32 cpu, i;
620 u32 interrupt_mask = 0; 972 u32 interrupt_mask = 0;
621 u32 i;
622 973
623 /* This routine gets called once for the system. 974 /* This routine gets called once for the system.
624 * There is one performance monitor per node, so we 975 * There is one performance monitor per node, so we
@@ -651,19 +1002,79 @@ static void cell_global_start(struct op_counter_config *ctr)
651 oprofile_running = 1; 1002 oprofile_running = 1;
652 smp_wmb(); 1003 smp_wmb();
653 1004
654 /* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being 1005 /*
655 * executed which manipulates the PMU. We start the "virtual counter" 1006 * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
1007 * executed which manipulates the PMU. We start the "virtual counter"
656 * here so that we do not need to synchronize access to the PMU in 1008 * here so that we do not need to synchronize access to the PMU in
657 * the above for-loop. 1009 * the above for-loop.
658 */ 1010 */
659 start_virt_cntrs(); 1011 start_virt_cntrs();
1012
1013 return 0;
660} 1014}
661 1015
662static void cell_global_stop(void) 1016static int cell_global_start(struct op_counter_config *ctr)
1017{
1018 if (spu_cycle_reset)
1019 return cell_global_start_spu(ctr);
1020 else
1021 return cell_global_start_ppu(ctr);
1022}
1023
1024/*
1025 * Note the generic OProfile stop calls do not support returning
1026 * an error on stop. Hence, will not return an error if the FW
1027 * calls fail on stop. Failure to reset the debug bus is not an issue.
1028 * Failure to disable the SPU profiling is not an issue. The FW calls
1029 * to enable the performance counters and debug bus will work even if
1030 * the hardware was not cleanly reset.
1031 */
1032static void cell_global_stop_spu(void)
1033{
1034 int subfunc, rtn_value;
1035 unsigned int lfsr_value;
1036 int cpu;
1037
1038 oprofile_running = 0;
1039
1040#ifdef CONFIG_CPU_FREQ
1041 cpufreq_unregister_notifier(&cpu_freq_notifier_block,
1042 CPUFREQ_TRANSITION_NOTIFIER);
1043#endif
1044
1045 for_each_online_cpu(cpu) {
1046 if (cbe_get_hw_thread_id(cpu))
1047 continue;
1048
1049 subfunc = 3; /*
1050 * 2 - activate SPU tracing,
1051 * 3 - deactivate
1052 */
1053 lfsr_value = 0x8f100000;
1054
1055 rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
1056 subfunc, cbe_cpu_to_node(cpu),
1057 lfsr_value);
1058
1059 if (unlikely(rtn_value != 0)) {
1060 printk(KERN_ERR
1061 "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
1062 __FUNCTION__, rtn_value);
1063 }
1064
1065 /* Deactivate the signals */
1066 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1067 }
1068
1069 stop_spu_profiling();
1070}
1071
1072static void cell_global_stop_ppu(void)
663{ 1073{
664 int cpu; 1074 int cpu;
665 1075
666 /* This routine will be called once for the system. 1076 /*
1077 * This routine will be called once for the system.
667 * There is one performance monitor per node, so we 1078 * There is one performance monitor per node, so we
668 * only need to perform this function once per node. 1079 * only need to perform this function once per node.
669 */ 1080 */
@@ -687,8 +1098,16 @@ static void cell_global_stop(void)
687 } 1098 }
688} 1099}
689 1100
690static void 1101static void cell_global_stop(void)
691cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) 1102{
1103 if (spu_cycle_reset)
1104 cell_global_stop_spu();
1105 else
1106 cell_global_stop_ppu();
1107}
1108
1109static void cell_handle_interrupt(struct pt_regs *regs,
1110 struct op_counter_config *ctr)
692{ 1111{
693 u32 cpu; 1112 u32 cpu;
694 u64 pc; 1113 u64 pc;
@@ -699,13 +1118,15 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
699 1118
700 cpu = smp_processor_id(); 1119 cpu = smp_processor_id();
701 1120
702 /* Need to make sure the interrupt handler and the virt counter 1121 /*
1122 * Need to make sure the interrupt handler and the virt counter
703 * routine are not running at the same time. See the 1123 * routine are not running at the same time. See the
704 * cell_virtual_cntr() routine for additional comments. 1124 * cell_virtual_cntr() routine for additional comments.
705 */ 1125 */
706 spin_lock_irqsave(&virt_cntr_lock, flags); 1126 spin_lock_irqsave(&virt_cntr_lock, flags);
707 1127
708 /* Need to disable and reenable the performance counters 1128 /*
1129 * Need to disable and reenable the performance counters
709 * to get the desired behavior from the hardware. This 1130 * to get the desired behavior from the hardware. This
710 * is hardware specific. 1131 * is hardware specific.
711 */ 1132 */
@@ -714,7 +1135,8 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
714 1135
715 interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); 1136 interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
716 1137
717 /* If the interrupt mask has been cleared, then the virt cntr 1138 /*
1139 * If the interrupt mask has been cleared, then the virt cntr
718 * has cleared the interrupt. When the thread that generated 1140 * has cleared the interrupt. When the thread that generated
719 * the interrupt is restored, the data count will be restored to 1141 * the interrupt is restored, the data count will be restored to
720 * 0xffffff0 to cause the interrupt to be regenerated. 1142 * 0xffffff0 to cause the interrupt to be regenerated.
@@ -732,18 +1154,20 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
732 } 1154 }
733 } 1155 }
734 1156
735 /* The counters were frozen by the interrupt. 1157 /*
1158 * The counters were frozen by the interrupt.
736 * Reenable the interrupt and restart the counters. 1159 * Reenable the interrupt and restart the counters.
737 * If there was a race between the interrupt handler and 1160 * If there was a race between the interrupt handler and
738 * the virtual counter routine. The virutal counter 1161 * the virtual counter routine. The virutal counter
739 * routine may have cleared the interrupts. Hence must 1162 * routine may have cleared the interrupts. Hence must
740 * use the virt_cntr_inter_mask to re-enable the interrupts. 1163 * use the virt_cntr_inter_mask to re-enable the interrupts.
741 */ 1164 */
742 cbe_enable_pm_interrupts(cpu, hdw_thread, 1165 cbe_enable_pm_interrupts(cpu, hdw_thread,
743 virt_cntr_inter_mask); 1166 virt_cntr_inter_mask);
744 1167
745 /* The writes to the various performance counters only writes 1168 /*
746 * to a latch. The new values (interrupt setting bits, reset 1169 * The writes to the various performance counters only writes
1170 * to a latch. The new values (interrupt setting bits, reset
747 * counter value etc.) are not copied to the actual registers 1171 * counter value etc.) are not copied to the actual registers
748 * until the performance monitor is enabled. In order to get 1172 * until the performance monitor is enabled. In order to get
749 * this to work as desired, the permormance monitor needs to 1173 * this to work as desired, the permormance monitor needs to
@@ -755,10 +1179,33 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
755 spin_unlock_irqrestore(&virt_cntr_lock, flags); 1179 spin_unlock_irqrestore(&virt_cntr_lock, flags);
756} 1180}
757 1181
1182/*
1183 * This function is called from the generic OProfile
1184 * driver. When profiling PPUs, we need to do the
1185 * generic sync start; otherwise, do spu_sync_start.
1186 */
1187static int cell_sync_start(void)
1188{
1189 if (spu_cycle_reset)
1190 return spu_sync_start();
1191 else
1192 return DO_GENERIC_SYNC;
1193}
1194
1195static int cell_sync_stop(void)
1196{
1197 if (spu_cycle_reset)
1198 return spu_sync_stop();
1199 else
1200 return 1;
1201}
1202
758struct op_powerpc_model op_model_cell = { 1203struct op_powerpc_model op_model_cell = {
759 .reg_setup = cell_reg_setup, 1204 .reg_setup = cell_reg_setup,
760 .cpu_setup = cell_cpu_setup, 1205 .cpu_setup = cell_cpu_setup,
761 .global_start = cell_global_start, 1206 .global_start = cell_global_start,
762 .global_stop = cell_global_stop, 1207 .global_stop = cell_global_stop,
1208 .sync_start = cell_sync_start,
1209 .sync_stop = cell_sync_stop,
763 .handle_interrupt = cell_handle_interrupt, 1210 .handle_interrupt = cell_handle_interrupt,
764}; 1211};
diff --git a/arch/powerpc/oprofile/op_model_fsl_booke.c b/arch/powerpc/oprofile/op_model_fsl_booke.c
index 2267eb8c661b..183a28bb1812 100644
--- a/arch/powerpc/oprofile/op_model_fsl_booke.c
+++ b/arch/powerpc/oprofile/op_model_fsl_booke.c
@@ -244,7 +244,7 @@ static void dump_pmcs(void)
244 mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3)); 244 mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3));
245} 245}
246 246
247static void fsl_booke_cpu_setup(struct op_counter_config *ctr) 247static int fsl_booke_cpu_setup(struct op_counter_config *ctr)
248{ 248{
249 int i; 249 int i;
250 250
@@ -258,9 +258,11 @@ static void fsl_booke_cpu_setup(struct op_counter_config *ctr)
258 258
259 set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel); 259 set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel);
260 } 260 }
261
262 return 0;
261} 263}
262 264
263static void fsl_booke_reg_setup(struct op_counter_config *ctr, 265static int fsl_booke_reg_setup(struct op_counter_config *ctr,
264 struct op_system_config *sys, 266 struct op_system_config *sys,
265 int num_ctrs) 267 int num_ctrs)
266{ 268{
@@ -276,9 +278,10 @@ static void fsl_booke_reg_setup(struct op_counter_config *ctr,
276 for (i = 0; i < num_counters; ++i) 278 for (i = 0; i < num_counters; ++i)
277 reset_value[i] = 0x80000000UL - ctr[i].count; 279 reset_value[i] = 0x80000000UL - ctr[i].count;
278 280
281 return 0;
279} 282}
280 283
281static void fsl_booke_start(struct op_counter_config *ctr) 284static int fsl_booke_start(struct op_counter_config *ctr)
282{ 285{
283 int i; 286 int i;
284 287
@@ -308,6 +311,8 @@ static void fsl_booke_start(struct op_counter_config *ctr)
308 311
309 pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(), 312 pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(),
310 mfpmr(PMRN_PMGC0)); 313 mfpmr(PMRN_PMGC0));
314
315 return 0;
311} 316}
312 317
313static void fsl_booke_stop(void) 318static void fsl_booke_stop(void)
diff --git a/arch/powerpc/oprofile/op_model_pa6t.c b/arch/powerpc/oprofile/op_model_pa6t.c
index e8a56b0adadc..c40de461fd4e 100644
--- a/arch/powerpc/oprofile/op_model_pa6t.c
+++ b/arch/powerpc/oprofile/op_model_pa6t.c
@@ -89,7 +89,7 @@ static inline void ctr_write(unsigned int i, u64 val)
89 89
90 90
91/* precompute the values to stuff in the hardware registers */ 91/* precompute the values to stuff in the hardware registers */
92static void pa6t_reg_setup(struct op_counter_config *ctr, 92static int pa6t_reg_setup(struct op_counter_config *ctr,
93 struct op_system_config *sys, 93 struct op_system_config *sys,
94 int num_ctrs) 94 int num_ctrs)
95{ 95{
@@ -135,10 +135,12 @@ static void pa6t_reg_setup(struct op_counter_config *ctr,
135 pr_debug("reset_value for pmc%u inited to 0x%lx\n", 135 pr_debug("reset_value for pmc%u inited to 0x%lx\n",
136 pmc, reset_value[pmc]); 136 pmc, reset_value[pmc]);
137 } 137 }
138
139 return 0;
138} 140}
139 141
140/* configure registers on this cpu */ 142/* configure registers on this cpu */
141static void pa6t_cpu_setup(struct op_counter_config *ctr) 143static int pa6t_cpu_setup(struct op_counter_config *ctr)
142{ 144{
143 u64 mmcr0 = mmcr0_val; 145 u64 mmcr0 = mmcr0_val;
144 u64 mmcr1 = mmcr1_val; 146 u64 mmcr1 = mmcr1_val;
@@ -154,9 +156,11 @@ static void pa6t_cpu_setup(struct op_counter_config *ctr)
154 mfspr(SPRN_PA6T_MMCR0)); 156 mfspr(SPRN_PA6T_MMCR0));
155 pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(), 157 pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(),
156 mfspr(SPRN_PA6T_MMCR1)); 158 mfspr(SPRN_PA6T_MMCR1));
159
160 return 0;
157} 161}
158 162
159static void pa6t_start(struct op_counter_config *ctr) 163static int pa6t_start(struct op_counter_config *ctr)
160{ 164{
161 int i; 165 int i;
162 166
@@ -174,6 +178,8 @@ static void pa6t_start(struct op_counter_config *ctr)
174 oprofile_running = 1; 178 oprofile_running = 1;
175 179
176 pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0); 180 pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0);
181
182 return 0;
177} 183}
178 184
179static void pa6t_stop(void) 185static void pa6t_stop(void)
diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c
index a7c206b665af..cddc250a6a5c 100644
--- a/arch/powerpc/oprofile/op_model_power4.c
+++ b/arch/powerpc/oprofile/op_model_power4.c
@@ -32,7 +32,7 @@ static u32 mmcr0_val;
32static u64 mmcr1_val; 32static u64 mmcr1_val;
33static u64 mmcra_val; 33static u64 mmcra_val;
34 34
35static void power4_reg_setup(struct op_counter_config *ctr, 35static int power4_reg_setup(struct op_counter_config *ctr,
36 struct op_system_config *sys, 36 struct op_system_config *sys,
37 int num_ctrs) 37 int num_ctrs)
38{ 38{
@@ -60,6 +60,8 @@ static void power4_reg_setup(struct op_counter_config *ctr,
60 mmcr0_val &= ~MMCR0_PROBLEM_DISABLE; 60 mmcr0_val &= ~MMCR0_PROBLEM_DISABLE;
61 else 61 else
62 mmcr0_val |= MMCR0_PROBLEM_DISABLE; 62 mmcr0_val |= MMCR0_PROBLEM_DISABLE;
63
64 return 0;
63} 65}
64 66
65extern void ppc64_enable_pmcs(void); 67extern void ppc64_enable_pmcs(void);
@@ -84,7 +86,7 @@ static inline int mmcra_must_set_sample(void)
84 return 0; 86 return 0;
85} 87}
86 88
87static void power4_cpu_setup(struct op_counter_config *ctr) 89static int power4_cpu_setup(struct op_counter_config *ctr)
88{ 90{
89 unsigned int mmcr0 = mmcr0_val; 91 unsigned int mmcr0 = mmcr0_val;
90 unsigned long mmcra = mmcra_val; 92 unsigned long mmcra = mmcra_val;
@@ -111,9 +113,11 @@ static void power4_cpu_setup(struct op_counter_config *ctr)
111 mfspr(SPRN_MMCR1)); 113 mfspr(SPRN_MMCR1));
112 dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(), 114 dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(),
113 mfspr(SPRN_MMCRA)); 115 mfspr(SPRN_MMCRA));
116
117 return 0;
114} 118}
115 119
116static void power4_start(struct op_counter_config *ctr) 120static int power4_start(struct op_counter_config *ctr)
117{ 121{
118 int i; 122 int i;
119 unsigned int mmcr0; 123 unsigned int mmcr0;
@@ -148,6 +152,7 @@ static void power4_start(struct op_counter_config *ctr)
148 oprofile_running = 1; 152 oprofile_running = 1;
149 153
150 dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); 154 dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
155 return 0;
151} 156}
152 157
153static void power4_stop(void) 158static void power4_stop(void)
diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c
index c731acbfb2a5..a20afe45d936 100644
--- a/arch/powerpc/oprofile/op_model_rs64.c
+++ b/arch/powerpc/oprofile/op_model_rs64.c
@@ -88,7 +88,7 @@ static unsigned long reset_value[OP_MAX_COUNTER];
88 88
89static int num_counters; 89static int num_counters;
90 90
91static void rs64_reg_setup(struct op_counter_config *ctr, 91static int rs64_reg_setup(struct op_counter_config *ctr,
92 struct op_system_config *sys, 92 struct op_system_config *sys,
93 int num_ctrs) 93 int num_ctrs)
94{ 94{
@@ -100,9 +100,10 @@ static void rs64_reg_setup(struct op_counter_config *ctr,
100 reset_value[i] = 0x80000000UL - ctr[i].count; 100 reset_value[i] = 0x80000000UL - ctr[i].count;
101 101
102 /* XXX setup user and kernel profiling */ 102 /* XXX setup user and kernel profiling */
103 return 0;
103} 104}
104 105
105static void rs64_cpu_setup(struct op_counter_config *ctr) 106static int rs64_cpu_setup(struct op_counter_config *ctr)
106{ 107{
107 unsigned int mmcr0; 108 unsigned int mmcr0;
108 109
@@ -125,9 +126,11 @@ static void rs64_cpu_setup(struct op_counter_config *ctr)
125 mfspr(SPRN_MMCR0)); 126 mfspr(SPRN_MMCR0));
126 dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), 127 dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(),
127 mfspr(SPRN_MMCR1)); 128 mfspr(SPRN_MMCR1));
129
130 return 0;
128} 131}
129 132
130static void rs64_start(struct op_counter_config *ctr) 133static int rs64_start(struct op_counter_config *ctr)
131{ 134{
132 int i; 135 int i;
133 unsigned int mmcr0; 136 unsigned int mmcr0;
@@ -155,6 +158,7 @@ static void rs64_start(struct op_counter_config *ctr)
155 mtspr(SPRN_MMCR0, mmcr0); 158 mtspr(SPRN_MMCR0, mmcr0);
156 159
157 dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); 160 dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
161 return 0;
158} 162}
159 163
160static void rs64_stop(void) 164static void rs64_stop(void)
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 33545d352e92..932538a93c2b 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -272,4 +272,14 @@ config CPM2
272 you wish to build a kernel for a machine with a CPM2 coprocessor 272 you wish to build a kernel for a machine with a CPM2 coprocessor
273 on it (826x, 827x, 8560). 273 on it (826x, 827x, 8560).
274 274
275config AXON_RAM
276 tristate "Axon DDR2 memory device driver"
277 depends on PPC_IBM_CELL_BLADE
278 default m
279 help
280 It registers one block device per Axon's DDR2 memory bank found
281 on a system. Block devices are called axonram?, their major and
282 minor numbers are available in /proc/devices, /proc/partitions or
283 in /sys/block/axonram?/dev.
284
275endmenu 285endmenu
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 9b2b386ccf48..ac8032034fb8 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -73,4 +73,14 @@ config CBE_CPUFREQ
73 For details, take a look at <file:Documentation/cpu-freq/>. 73 For details, take a look at <file:Documentation/cpu-freq/>.
74 If you don't have such processor, say N 74 If you don't have such processor, say N
75 75
76config CBE_CPUFREQ_PMI
77 tristate "CBE frequency scaling using PMI interface"
78 depends on CBE_CPUFREQ && PPC_PMI && EXPERIMENTAL
79 default n
80 help
81 Select this, if you want to use the PMI interface
82 to switch frequencies. Using PMI, the
83 processor will not only be able to run at lower speed,
84 but also at lower core voltage.
85
76endmenu 86endmenu
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index 869af89df6ff..f88a7c76f296 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -4,7 +4,9 @@ obj-$(CONFIG_PPC_CELL_NATIVE) += interrupt.o iommu.o setup.o \
4obj-$(CONFIG_CBE_RAS) += ras.o 4obj-$(CONFIG_CBE_RAS) += ras.o
5 5
6obj-$(CONFIG_CBE_THERM) += cbe_thermal.o 6obj-$(CONFIG_CBE_THERM) += cbe_thermal.o
7obj-$(CONFIG_CBE_CPUFREQ) += cbe_cpufreq.o 7obj-$(CONFIG_CBE_CPUFREQ_PMI) += cbe_cpufreq_pmi.o
8obj-$(CONFIG_CBE_CPUFREQ) += cbe-cpufreq.o
9cbe-cpufreq-y += cbe_cpufreq_pervasive.o cbe_cpufreq.o
8 10
9ifeq ($(CONFIG_SMP),y) 11ifeq ($(CONFIG_SMP),y)
10obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o 12obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o
@@ -23,3 +25,5 @@ obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \
23 $(spu-priv1-y) \ 25 $(spu-priv1-y) \
24 $(spu-manage-y) \ 26 $(spu-manage-y) \
25 spufs/ 27 spufs/
28
29obj-$(CONFIG_PCI_MSI) += axon_msi.o
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
new file mode 100644
index 000000000000..4c9ab5b70bae
--- /dev/null
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -0,0 +1,445 @@
1/*
2 * Copyright 2007, Michael Ellerman, IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10
11#include <linux/interrupt.h>
12#include <linux/irq.h>
13#include <linux/kernel.h>
14#include <linux/pci.h>
15#include <linux/msi.h>
16#include <linux/reboot.h>
17
18#include <asm/dcr.h>
19#include <asm/machdep.h>
20#include <asm/prom.h>
21
22
23/*
24 * MSIC registers, specified as offsets from dcr_base
25 */
26#define MSIC_CTRL_REG 0x0
27
28/* Base Address registers specify FIFO location in BE memory */
29#define MSIC_BASE_ADDR_HI_REG 0x3
30#define MSIC_BASE_ADDR_LO_REG 0x4
31
32/* Hold the read/write offsets into the FIFO */
33#define MSIC_READ_OFFSET_REG 0x5
34#define MSIC_WRITE_OFFSET_REG 0x6
35
36
37/* MSIC control register flags */
38#define MSIC_CTRL_ENABLE 0x0001
39#define MSIC_CTRL_FIFO_FULL_ENABLE 0x0002
40#define MSIC_CTRL_IRQ_ENABLE 0x0008
41#define MSIC_CTRL_FULL_STOP_ENABLE 0x0010
42
43/*
44 * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB.
45 * Currently we're using a 64KB FIFO size.
46 */
47#define MSIC_FIFO_SIZE_SHIFT 16
48#define MSIC_FIFO_SIZE_BYTES (1 << MSIC_FIFO_SIZE_SHIFT)
49
50/*
51 * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits
52 * 8-9 of the MSIC control reg.
53 */
54#define MSIC_CTRL_FIFO_SIZE (((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300)
55
56/*
57 * We need to mask the read/write offsets to make sure they stay within
58 * the bounds of the FIFO. Also they should always be 16-byte aligned.
59 */
60#define MSIC_FIFO_SIZE_MASK ((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu)
61
62/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */
63#define MSIC_FIFO_ENTRY_SIZE 0x10
64
65
66struct axon_msic {
67 struct device_node *dn;
68 struct irq_host *irq_host;
69 __le32 *fifo;
70 dcr_host_t dcr_host;
71 struct list_head list;
72 u32 read_offset;
73 u32 dcr_base;
74};
75
76static LIST_HEAD(axon_msic_list);
77
78static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
79{
80 pr_debug("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n);
81
82 dcr_write(msic->dcr_host, msic->dcr_base + dcr_n, val);
83}
84
85static u32 msic_dcr_read(struct axon_msic *msic, unsigned int dcr_n)
86{
87 return dcr_read(msic->dcr_host, msic->dcr_base + dcr_n);
88}
89
90static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
91{
92 struct axon_msic *msic = get_irq_data(irq);
93 u32 write_offset, msi;
94 int idx;
95
96 write_offset = msic_dcr_read(msic, MSIC_WRITE_OFFSET_REG);
97 pr_debug("axon_msi: original write_offset 0x%x\n", write_offset);
98
99 /* write_offset doesn't wrap properly, so we have to mask it */
100 write_offset &= MSIC_FIFO_SIZE_MASK;
101
102 while (msic->read_offset != write_offset) {
103 idx = msic->read_offset / sizeof(__le32);
104 msi = le32_to_cpu(msic->fifo[idx]);
105 msi &= 0xFFFF;
106
107 pr_debug("axon_msi: woff %x roff %x msi %x\n",
108 write_offset, msic->read_offset, msi);
109
110 msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
111 msic->read_offset &= MSIC_FIFO_SIZE_MASK;
112
113 if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host)
114 generic_handle_irq(msi);
115 else
116 pr_debug("axon_msi: invalid irq 0x%x!\n", msi);
117 }
118
119 desc->chip->eoi(irq);
120}
121
122static struct axon_msic *find_msi_translator(struct pci_dev *dev)
123{
124 struct irq_host *irq_host;
125 struct device_node *dn, *tmp;
126 const phandle *ph;
127 struct axon_msic *msic = NULL;
128
129 dn = pci_device_to_OF_node(dev);
130 if (!dn) {
131 dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
132 return NULL;
133 }
134
135 for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) {
136 ph = of_get_property(dn, "msi-translator", NULL);
137 if (ph)
138 break;
139 }
140
141 if (!ph) {
142 dev_dbg(&dev->dev,
143 "axon_msi: no msi-translator property found\n");
144 goto out_error;
145 }
146
147 tmp = dn;
148 dn = of_find_node_by_phandle(*ph);
149 if (!dn) {
150 dev_dbg(&dev->dev,
151 "axon_msi: msi-translator doesn't point to a node\n");
152 goto out_error;
153 }
154
155 irq_host = irq_find_host(dn);
156 if (!irq_host) {
157 dev_dbg(&dev->dev, "axon_msi: no irq_host found for node %s\n",
158 dn->full_name);
159 goto out_error;
160 }
161
162 msic = irq_host->host_data;
163
164out_error:
165 of_node_put(dn);
166 of_node_put(tmp);
167
168 return msic;
169}
170
171static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type)
172{
173 if (!find_msi_translator(dev))
174 return -ENODEV;
175
176 return 0;
177}
178
179static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
180{
181 struct device_node *dn, *tmp;
182 struct msi_desc *entry;
183 int len;
184 const u32 *prop;
185
186 dn = pci_device_to_OF_node(dev);
187 if (!dn) {
188 dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
189 return -ENODEV;
190 }
191
192 entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
193
194 for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) {
195 if (entry->msi_attrib.is_64) {
196 prop = of_get_property(dn, "msi-address-64", &len);
197 if (prop)
198 break;
199 }
200
201 prop = of_get_property(dn, "msi-address-32", &len);
202 if (prop)
203 break;
204 }
205
206 if (!prop) {
207 dev_dbg(&dev->dev,
208 "axon_msi: no msi-address-(32|64) properties found\n");
209 return -ENOENT;
210 }
211
212 switch (len) {
213 case 8:
214 msg->address_hi = prop[0];
215 msg->address_lo = prop[1];
216 break;
217 case 4:
218 msg->address_hi = 0;
219 msg->address_lo = prop[0];
220 break;
221 default:
222 dev_dbg(&dev->dev,
223 "axon_msi: malformed msi-address-(32|64) property\n");
224 of_node_put(dn);
225 return -EINVAL;
226 }
227
228 of_node_put(dn);
229
230 return 0;
231}
232
233static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
234{
235 unsigned int virq, rc;
236 struct msi_desc *entry;
237 struct msi_msg msg;
238 struct axon_msic *msic;
239
240 msic = find_msi_translator(dev);
241 if (!msic)
242 return -ENODEV;
243
244 rc = setup_msi_msg_address(dev, &msg);
245 if (rc)
246 return rc;
247
248 /* We rely on being able to stash a virq in a u16 */
249 BUILD_BUG_ON(NR_IRQS > 65536);
250
251 list_for_each_entry(entry, &dev->msi_list, list) {
252 virq = irq_create_direct_mapping(msic->irq_host);
253 if (virq == NO_IRQ) {
254 dev_warn(&dev->dev,
255 "axon_msi: virq allocation failed!\n");
256 return -1;
257 }
258 dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq);
259
260 set_irq_msi(virq, entry);
261 msg.data = virq;
262 write_msi_msg(virq, &msg);
263 }
264
265 return 0;
266}
267
268static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
269{
270 struct msi_desc *entry;
271
272 dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n");
273
274 list_for_each_entry(entry, &dev->msi_list, list) {
275 if (entry->irq == NO_IRQ)
276 continue;
277
278 set_irq_msi(entry->irq, NULL);
279 irq_dispose_mapping(entry->irq);
280 }
281}
282
283static struct irq_chip msic_irq_chip = {
284 .mask = mask_msi_irq,
285 .unmask = unmask_msi_irq,
286 .shutdown = unmask_msi_irq,
287 .typename = "AXON-MSI",
288};
289
290static int msic_host_map(struct irq_host *h, unsigned int virq,
291 irq_hw_number_t hw)
292{
293 set_irq_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
294
295 return 0;
296}
297
298static int msic_host_match(struct irq_host *host, struct device_node *dn)
299{
300 struct axon_msic *msic = host->host_data;
301
302 return msic->dn == dn;
303}
304
305static struct irq_host_ops msic_host_ops = {
306 .match = msic_host_match,
307 .map = msic_host_map,
308};
309
310static int axon_msi_notify_reboot(struct notifier_block *nb,
311 unsigned long code, void *data)
312{
313 struct axon_msic *msic;
314 u32 tmp;
315
316 list_for_each_entry(msic, &axon_msic_list, list) {
317 pr_debug("axon_msi: disabling %s\n", msic->dn->full_name);
318 tmp = msic_dcr_read(msic, MSIC_CTRL_REG);
319 tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
320 msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
321 }
322
323 return 0;
324}
325
326static struct notifier_block axon_msi_reboot_notifier = {
327 .notifier_call = axon_msi_notify_reboot
328};
329
330static int axon_msi_setup_one(struct device_node *dn)
331{
332 struct page *page;
333 struct axon_msic *msic;
334 unsigned int virq;
335 int dcr_len;
336
337 pr_debug("axon_msi: setting up dn %s\n", dn->full_name);
338
339 msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
340 if (!msic) {
341 printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n",
342 dn->full_name);
343 goto out;
344 }
345
346 msic->dcr_base = dcr_resource_start(dn, 0);
347 dcr_len = dcr_resource_len(dn, 0);
348
349 if (msic->dcr_base == 0 || dcr_len == 0) {
350 printk(KERN_ERR
351 "axon_msi: couldn't parse dcr properties on %s\n",
352 dn->full_name);
353 goto out;
354 }
355
356 msic->dcr_host = dcr_map(dn, msic->dcr_base, dcr_len);
357 if (!DCR_MAP_OK(msic->dcr_host)) {
358 printk(KERN_ERR "axon_msi: dcr_map failed for %s\n",
359 dn->full_name);
360 goto out_free_msic;
361 }
362
363 page = alloc_pages_node(of_node_to_nid(dn), GFP_KERNEL,
364 get_order(MSIC_FIFO_SIZE_BYTES));
365 if (!page) {
366 printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n",
367 dn->full_name);
368 goto out_free_msic;
369 }
370
371 msic->fifo = page_address(page);
372
373 msic->irq_host = irq_alloc_host(IRQ_HOST_MAP_NOMAP, NR_IRQS,
374 &msic_host_ops, 0);
375 if (!msic->irq_host) {
376 printk(KERN_ERR "axon_msi: couldn't allocate irq_host for %s\n",
377 dn->full_name);
378 goto out_free_fifo;
379 }
380
381 msic->irq_host->host_data = msic;
382
383 virq = irq_of_parse_and_map(dn, 0);
384 if (virq == NO_IRQ) {
385 printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n",
386 dn->full_name);
387 goto out_free_host;
388 }
389
390 msic->dn = of_node_get(dn);
391
392 set_irq_data(virq, msic);
393 set_irq_chained_handler(virq, axon_msi_cascade);
394 pr_debug("axon_msi: irq 0x%x setup for axon_msi\n", virq);
395
396 /* Enable the MSIC hardware */
397 msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, (u64)msic->fifo >> 32);
398 msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG,
399 (u64)msic->fifo & 0xFFFFFFFF);
400 msic_dcr_write(msic, MSIC_CTRL_REG,
401 MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE |
402 MSIC_CTRL_FIFO_SIZE);
403
404 list_add(&msic->list, &axon_msic_list);
405
406 printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name);
407
408 return 0;
409
410out_free_host:
411 kfree(msic->irq_host);
412out_free_fifo:
413 __free_pages(virt_to_page(msic->fifo), get_order(MSIC_FIFO_SIZE_BYTES));
414out_free_msic:
415 kfree(msic);
416out:
417
418 return -1;
419}
420
421static int axon_msi_init(void)
422{
423 struct device_node *dn;
424 int found = 0;
425
426 pr_debug("axon_msi: initialising ...\n");
427
428 for_each_compatible_node(dn, NULL, "ibm,axon-msic") {
429 if (axon_msi_setup_one(dn) == 0)
430 found++;
431 }
432
433 if (found) {
434 ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs;
435 ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
436 ppc_md.msi_check_device = axon_msi_check_device;
437
438 register_reboot_notifier(&axon_msi_reboot_notifier);
439
440 pr_debug("axon_msi: registered callbacks!\n");
441 }
442
443 return 0;
444}
445arch_initcall(axon_msi_init);
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/arch/powerpc/platforms/cell/cbe_cpufreq.c
index ab511d5b65a4..0b6e8ee85ab1 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq.c
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * cpufreq driver for the cell processor 2 * cpufreq driver for the cell processor
3 * 3 *
4 * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 4 * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
5 * 5 *
6 * Author: Christian Krafft <krafft@de.ibm.com> 6 * Author: Christian Krafft <krafft@de.ibm.com>
7 * 7 *
@@ -21,18 +21,11 @@
21 */ 21 */
22 22
23#include <linux/cpufreq.h> 23#include <linux/cpufreq.h>
24#include <linux/timer.h>
25
26#include <asm/hw_irq.h>
27#include <asm/io.h>
28#include <asm/machdep.h> 24#include <asm/machdep.h>
29#include <asm/processor.h>
30#include <asm/prom.h>
31#include <asm/time.h>
32#include <asm/pmi.h>
33#include <asm/of_platform.h> 25#include <asm/of_platform.h>
34 26#include <asm/prom.h>
35#include "cbe_regs.h" 27#include "cbe_regs.h"
28#include "cbe_cpufreq.h"
36 29
37static DEFINE_MUTEX(cbe_switch_mutex); 30static DEFINE_MUTEX(cbe_switch_mutex);
38 31
@@ -50,159 +43,24 @@ static struct cpufreq_frequency_table cbe_freqs[] = {
50 {0, CPUFREQ_TABLE_END}, 43 {0, CPUFREQ_TABLE_END},
51}; 44};
52 45
53/* to write to MIC register */
54static u64 MIC_Slow_Fast_Timer_table[] = {
55 [0 ... 7] = 0x007fc00000000000ull,
56};
57
58/* more values for the MIC */
59static u64 MIC_Slow_Next_Timer_table[] = {
60 0x0000240000000000ull,
61 0x0000268000000000ull,
62 0x000029C000000000ull,
63 0x00002D0000000000ull,
64 0x0000300000000000ull,
65 0x0000334000000000ull,
66 0x000039C000000000ull,
67 0x00003FC000000000ull,
68};
69
70static unsigned int pmi_frequency_limit = 0;
71/* 46/*
72 * hardware specific functions 47 * hardware specific functions
73 */ 48 */
74 49
75static struct of_device *pmi_dev; 50static int set_pmode(unsigned int cpu, unsigned int slow_mode)
76
77#ifdef CONFIG_PPC_PMI
78static int set_pmode_pmi(int cpu, unsigned int pmode)
79{
80 int ret;
81 pmi_message_t pmi_msg;
82#ifdef DEBUG
83 u64 time;
84#endif
85
86 pmi_msg.type = PMI_TYPE_FREQ_CHANGE;
87 pmi_msg.data1 = cbe_cpu_to_node(cpu);
88 pmi_msg.data2 = pmode;
89
90#ifdef DEBUG
91 time = (u64) get_cycles();
92#endif
93
94 pmi_send_message(pmi_dev, pmi_msg);
95 ret = pmi_msg.data2;
96
97 pr_debug("PMI returned slow mode %d\n", ret);
98
99#ifdef DEBUG
100 time = (u64) get_cycles() - time; /* actual cycles (not cpu cycles!) */
101 time = 1000000000 * time / CLOCK_TICK_RATE; /* time in ns (10^-9) */
102 pr_debug("had to wait %lu ns for a transition\n", time);
103#endif
104 return ret;
105}
106#endif
107
108static int get_pmode(int cpu)
109{ 51{
110 int ret; 52 int rc;
111 struct cbe_pmd_regs __iomem *pmd_regs;
112
113 pmd_regs = cbe_get_cpu_pmd_regs(cpu);
114 ret = in_be64(&pmd_regs->pmsr) & 0x07;
115
116 return ret;
117}
118
119static int set_pmode_reg(int cpu, unsigned int pmode)
120{
121 struct cbe_pmd_regs __iomem *pmd_regs;
122 struct cbe_mic_tm_regs __iomem *mic_tm_regs;
123 u64 flags;
124 u64 value;
125
126 local_irq_save(flags);
127
128 mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu);
129 pmd_regs = cbe_get_cpu_pmd_regs(cpu);
130
131 pr_debug("pm register is mapped at %p\n", &pmd_regs->pmcr);
132 pr_debug("mic register is mapped at %p\n", &mic_tm_regs->slow_fast_timer_0);
133
134 out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]);
135 out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]);
136
137 out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]);
138 out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]);
139
140 value = in_be64(&pmd_regs->pmcr);
141 /* set bits to zero */
142 value &= 0xFFFFFFFFFFFFFFF8ull;
143 /* set bits to next pmode */
144 value |= pmode;
145
146 out_be64(&pmd_regs->pmcr, value);
147
148 /* wait until new pmode appears in status register */
149 value = in_be64(&pmd_regs->pmsr) & 0x07;
150 while(value != pmode) {
151 cpu_relax();
152 value = in_be64(&pmd_regs->pmsr) & 0x07;
153 }
154
155 local_irq_restore(flags);
156
157 return 0;
158}
159 53
160static int set_pmode(int cpu, unsigned int slow_mode) { 54 if (cbe_cpufreq_has_pmi)
161#ifdef CONFIG_PPC_PMI 55 rc = cbe_cpufreq_set_pmode_pmi(cpu, slow_mode);
162 if (pmi_dev)
163 return set_pmode_pmi(cpu, slow_mode);
164 else 56 else
165#endif 57 rc = cbe_cpufreq_set_pmode(cpu, slow_mode);
166 return set_pmode_reg(cpu, slow_mode);
167}
168
169static void cbe_cpufreq_handle_pmi(struct of_device *dev, pmi_message_t pmi_msg)
170{
171 u8 cpu;
172 u8 cbe_pmode_new;
173
174 BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE);
175 58
176 cpu = cbe_node_to_cpu(pmi_msg.data1); 59 pr_debug("register contains slow mode %d\n", cbe_cpufreq_get_pmode(cpu));
177 cbe_pmode_new = pmi_msg.data2;
178 60
179 pmi_frequency_limit = cbe_freqs[cbe_pmode_new].frequency; 61 return rc;
180
181 pr_debug("cbe_handle_pmi: max freq=%d\n", pmi_frequency_limit);
182}
183
184static int pmi_notifier(struct notifier_block *nb,
185 unsigned long event, void *data)
186{
187 struct cpufreq_policy *policy = data;
188
189 if (event != CPUFREQ_INCOMPATIBLE)
190 return 0;
191
192 cpufreq_verify_within_limits(policy, 0, pmi_frequency_limit);
193 return 0;
194} 62}
195 63
196static struct notifier_block pmi_notifier_block = {
197 .notifier_call = pmi_notifier,
198};
199
200static struct pmi_handler cbe_pmi_handler = {
201 .type = PMI_TYPE_FREQ_CHANGE,
202 .handle_pmi_message = cbe_cpufreq_handle_pmi,
203};
204
205
206/* 64/*
207 * cpufreq functions 65 * cpufreq functions
208 */ 66 */
@@ -221,8 +79,19 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
221 79
222 pr_debug("init cpufreq on CPU %d\n", policy->cpu); 80 pr_debug("init cpufreq on CPU %d\n", policy->cpu);
223 81
82 /*
83 * Let's check we can actually get to the CELL regs
84 */
85 if (!cbe_get_cpu_pmd_regs(policy->cpu) ||
86 !cbe_get_cpu_mic_tm_regs(policy->cpu)) {
87 pr_info("invalid CBE regs pointers for cpufreq\n");
88 return -EINVAL;
89 }
90
224 max_freqp = of_get_property(cpu, "clock-frequency", NULL); 91 max_freqp = of_get_property(cpu, "clock-frequency", NULL);
225 92
93 of_node_put(cpu);
94
226 if (!max_freqp) 95 if (!max_freqp)
227 return -EINVAL; 96 return -EINVAL;
228 97
@@ -239,10 +108,12 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
239 } 108 }
240 109
241 policy->governor = CPUFREQ_DEFAULT_GOVERNOR; 110 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
242 /* if DEBUG is enabled set_pmode() measures the correct latency of a transition */ 111
112 /* if DEBUG is enabled set_pmode() measures the latency
113 * of a transition */
243 policy->cpuinfo.transition_latency = 25000; 114 policy->cpuinfo.transition_latency = 25000;
244 115
245 cur_pmode = get_pmode(policy->cpu); 116 cur_pmode = cbe_cpufreq_get_pmode(policy->cpu);
246 pr_debug("current pmode is at %d\n",cur_pmode); 117 pr_debug("current pmode is at %d\n",cur_pmode);
247 118
248 policy->cur = cbe_freqs[cur_pmode].frequency; 119 policy->cur = cbe_freqs[cur_pmode].frequency;
@@ -253,21 +124,13 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
253 124
254 cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu); 125 cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu);
255 126
256 if (pmi_dev) { 127 /* this ensures that policy->cpuinfo_min
257 /* frequency might get limited later, initialize limit with max_freq */ 128 * and policy->cpuinfo_max are set correctly */
258 pmi_frequency_limit = max_freq;
259 cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
260 }
261
262 /* this ensures that policy->cpuinfo_min and policy->cpuinfo_max are set correctly */
263 return cpufreq_frequency_table_cpuinfo(policy, cbe_freqs); 129 return cpufreq_frequency_table_cpuinfo(policy, cbe_freqs);
264} 130}
265 131
266static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy) 132static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy)
267{ 133{
268 if (pmi_dev)
269 cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
270
271 cpufreq_frequency_table_put_attr(policy->cpu); 134 cpufreq_frequency_table_put_attr(policy->cpu);
272 return 0; 135 return 0;
273} 136}
@@ -277,13 +140,13 @@ static int cbe_cpufreq_verify(struct cpufreq_policy *policy)
277 return cpufreq_frequency_table_verify(policy, cbe_freqs); 140 return cpufreq_frequency_table_verify(policy, cbe_freqs);
278} 141}
279 142
280 143static int cbe_cpufreq_target(struct cpufreq_policy *policy,
281static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, 144 unsigned int target_freq,
282 unsigned int relation) 145 unsigned int relation)
283{ 146{
284 int rc; 147 int rc;
285 struct cpufreq_freqs freqs; 148 struct cpufreq_freqs freqs;
286 int cbe_pmode_new; 149 unsigned int cbe_pmode_new;
287 150
288 cpufreq_frequency_table_target(policy, 151 cpufreq_frequency_table_target(policy,
289 cbe_freqs, 152 cbe_freqs,
@@ -298,12 +161,14 @@ static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target
298 mutex_lock(&cbe_switch_mutex); 161 mutex_lock(&cbe_switch_mutex);
299 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 162 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
300 163
301 pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n", 164 pr_debug("setting frequency for cpu %d to %d kHz, " \
165 "1/%d of max frequency\n",
302 policy->cpu, 166 policy->cpu,
303 cbe_freqs[cbe_pmode_new].frequency, 167 cbe_freqs[cbe_pmode_new].frequency,
304 cbe_freqs[cbe_pmode_new].index); 168 cbe_freqs[cbe_pmode_new].index);
305 169
306 rc = set_pmode(policy->cpu, cbe_pmode_new); 170 rc = set_pmode(policy->cpu, cbe_pmode_new);
171
307 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 172 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
308 mutex_unlock(&cbe_switch_mutex); 173 mutex_unlock(&cbe_switch_mutex);
309 174
@@ -326,28 +191,14 @@ static struct cpufreq_driver cbe_cpufreq_driver = {
326 191
327static int __init cbe_cpufreq_init(void) 192static int __init cbe_cpufreq_init(void)
328{ 193{
329#ifdef CONFIG_PPC_PMI
330 struct device_node *np;
331#endif
332 if (!machine_is(cell)) 194 if (!machine_is(cell))
333 return -ENODEV; 195 return -ENODEV;
334#ifdef CONFIG_PPC_PMI
335 np = of_find_node_by_type(NULL, "ibm,pmi");
336
337 pmi_dev = of_find_device_by_node(np);
338 196
339 if (pmi_dev)
340 pmi_register_handler(pmi_dev, &cbe_pmi_handler);
341#endif
342 return cpufreq_register_driver(&cbe_cpufreq_driver); 197 return cpufreq_register_driver(&cbe_cpufreq_driver);
343} 198}
344 199
345static void __exit cbe_cpufreq_exit(void) 200static void __exit cbe_cpufreq_exit(void)
346{ 201{
347#ifdef CONFIG_PPC_PMI
348 if (pmi_dev)
349 pmi_unregister_handler(pmi_dev, &cbe_pmi_handler);
350#endif
351 cpufreq_unregister_driver(&cbe_cpufreq_driver); 202 cpufreq_unregister_driver(&cbe_cpufreq_driver);
352} 203}
353 204
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.h b/arch/powerpc/platforms/cell/cbe_cpufreq.h
new file mode 100644
index 000000000000..c1d86bfa92ff
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq.h
@@ -0,0 +1,24 @@
1/*
2 * cbe_cpufreq.h
3 *
4 * This file contains the definitions used by the cbe_cpufreq driver.
5 *
6 * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
7 *
8 * Author: Christian Krafft <krafft@de.ibm.com>
9 *
10 */
11
12#include <linux/cpufreq.h>
13#include <linux/types.h>
14
15int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode);
16int cbe_cpufreq_get_pmode(int cpu);
17
18int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode);
19
20#if defined(CONFIG_CBE_CPUFREQ_PMI) || defined(CONFIG_CBE_CPUFREQ_PMI_MODULE)
21extern bool cbe_cpufreq_has_pmi;
22#else
23#define cbe_cpufreq_has_pmi (0)
24#endif
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
new file mode 100644
index 000000000000..163263b3e1cd
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
@@ -0,0 +1,115 @@
1/*
2 * pervasive backend for the cbe_cpufreq driver
3 *
4 * This driver makes use of the pervasive unit to
5 * engage the desired frequency.
6 *
7 * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
8 *
9 * Author: Christian Krafft <krafft@de.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2, or (at your option)
14 * any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 */
25
26#include <linux/io.h>
27#include <linux/kernel.h>
28#include <linux/time.h>
29#include <asm/machdep.h>
30#include <asm/hw_irq.h>
31
32#include "cbe_regs.h"
33#include "cbe_cpufreq.h"
34
35/* to write to MIC register */
36static u64 MIC_Slow_Fast_Timer_table[] = {
37 [0 ... 7] = 0x007fc00000000000ull,
38};
39
40/* more values for the MIC */
41static u64 MIC_Slow_Next_Timer_table[] = {
42 0x0000240000000000ull,
43 0x0000268000000000ull,
44 0x000029C000000000ull,
45 0x00002D0000000000ull,
46 0x0000300000000000ull,
47 0x0000334000000000ull,
48 0x000039C000000000ull,
49 0x00003FC000000000ull,
50};
51
52
53int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode)
54{
55 struct cbe_pmd_regs __iomem *pmd_regs;
56 struct cbe_mic_tm_regs __iomem *mic_tm_regs;
57 u64 flags;
58 u64 value;
59#ifdef DEBUG
60 long time;
61#endif
62
63 local_irq_save(flags);
64
65 mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu);
66 pmd_regs = cbe_get_cpu_pmd_regs(cpu);
67
68#ifdef DEBUG
69 time = jiffies;
70#endif
71
72 out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]);
73 out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]);
74
75 out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]);
76 out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]);
77
78 value = in_be64(&pmd_regs->pmcr);
79 /* set bits to zero */
80 value &= 0xFFFFFFFFFFFFFFF8ull;
81 /* set bits to next pmode */
82 value |= pmode;
83
84 out_be64(&pmd_regs->pmcr, value);
85
86#ifdef DEBUG
87 /* wait until new pmode appears in status register */
88 value = in_be64(&pmd_regs->pmsr) & 0x07;
89 while (value != pmode) {
90 cpu_relax();
91 value = in_be64(&pmd_regs->pmsr) & 0x07;
92 }
93
94 time = jiffies - time;
95 time = jiffies_to_msecs(time);
96 pr_debug("had to wait %lu ms for a transition using " \
97 "pervasive unit\n", time);
98#endif
99 local_irq_restore(flags);
100
101 return 0;
102}
103
104
105int cbe_cpufreq_get_pmode(int cpu)
106{
107 int ret;
108 struct cbe_pmd_regs __iomem *pmd_regs;
109
110 pmd_regs = cbe_get_cpu_pmd_regs(cpu);
111 ret = in_be64(&pmd_regs->pmsr) & 0x07;
112
113 return ret;
114}
115
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
new file mode 100644
index 000000000000..fc6f38982ff4
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
@@ -0,0 +1,148 @@
1/*
2 * pmi backend for the cbe_cpufreq driver
3 *
4 * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
5 *
6 * Author: Christian Krafft <krafft@de.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2, or (at your option)
11 * any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 */
22
23#include <linux/kernel.h>
24#include <linux/types.h>
25#include <linux/timer.h>
26#include <asm/of_platform.h>
27#include <asm/processor.h>
28#include <asm/prom.h>
29#include <asm/pmi.h>
30
31#ifdef DEBUG
32#include <asm/time.h>
33#endif
34
35#include "cbe_regs.h"
36#include "cbe_cpufreq.h"
37
38static u8 pmi_slow_mode_limit[MAX_CBE];
39
40bool cbe_cpufreq_has_pmi = false;
41EXPORT_SYMBOL_GPL(cbe_cpufreq_has_pmi);
42
43/*
44 * hardware specific functions
45 */
46
47int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode)
48{
49 int ret;
50 pmi_message_t pmi_msg;
51#ifdef DEBUG
52 long time;
53#endif
54 pmi_msg.type = PMI_TYPE_FREQ_CHANGE;
55 pmi_msg.data1 = cbe_cpu_to_node(cpu);
56 pmi_msg.data2 = pmode;
57
58#ifdef DEBUG
59 time = jiffies;
60#endif
61 pmi_send_message(pmi_msg);
62
63#ifdef DEBUG
64 time = jiffies - time;
65 time = jiffies_to_msecs(time);
66 pr_debug("had to wait %lu ms for a transition using " \
67 "PMI\n", time);
68#endif
69 ret = pmi_msg.data2;
70 pr_debug("PMI returned slow mode %d\n", ret);
71
72 return ret;
73}
74EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi);
75
76
77static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg)
78{
79 u8 node, slow_mode;
80
81 BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE);
82
83 node = pmi_msg.data1;
84 slow_mode = pmi_msg.data2;
85
86 pmi_slow_mode_limit[node] = slow_mode;
87
88 pr_debug("cbe_handle_pmi: node: %d max_freq: %d\n", node, slow_mode);
89}
90
91static int pmi_notifier(struct notifier_block *nb,
92 unsigned long event, void *data)
93{
94 struct cpufreq_policy *policy = data;
95 struct cpufreq_frequency_table *cbe_freqs;
96 u8 node;
97
98 cbe_freqs = cpufreq_frequency_get_table(policy->cpu);
99 node = cbe_cpu_to_node(policy->cpu);
100
101 pr_debug("got notified, event=%lu, node=%u\n", event, node);
102
103 if (pmi_slow_mode_limit[node] != 0) {
104 pr_debug("limiting node %d to slow mode %d\n",
105 node, pmi_slow_mode_limit[node]);
106
107 cpufreq_verify_within_limits(policy, 0,
108
109 cbe_freqs[pmi_slow_mode_limit[node]].frequency);
110 }
111
112 return 0;
113}
114
115static struct notifier_block pmi_notifier_block = {
116 .notifier_call = pmi_notifier,
117};
118
119static struct pmi_handler cbe_pmi_handler = {
120 .type = PMI_TYPE_FREQ_CHANGE,
121 .handle_pmi_message = cbe_cpufreq_handle_pmi,
122};
123
124
125
126static int __init cbe_cpufreq_pmi_init(void)
127{
128 cbe_cpufreq_has_pmi = pmi_register_handler(&cbe_pmi_handler) == 0;
129
130 if (!cbe_cpufreq_has_pmi)
131 return -ENODEV;
132
133 cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
134
135 return 0;
136}
137
138static void __exit cbe_cpufreq_pmi_exit(void)
139{
140 cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
141 pmi_unregister_handler(&cbe_pmi_handler);
142}
143
144module_init(cbe_cpufreq_pmi_init);
145module_exit(cbe_cpufreq_pmi_exit);
146
147MODULE_LICENSE("GPL");
148MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
index 12c9674b4b1f..c8f7f0007422 100644
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -174,6 +174,13 @@ static struct device_node *cbe_get_be_node(int cpu_id)
174 174
175 cpu_handle = of_get_property(np, "cpus", &len); 175 cpu_handle = of_get_property(np, "cpus", &len);
176 176
177 /*
178 * the CAB SLOF tree is non compliant, so we just assume
179 * there is only one node
180 */
181 if (WARN_ON_ONCE(!cpu_handle))
182 return np;
183
177 for (i=0; i<len; i++) 184 for (i=0; i<len; i++)
178 if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL)) 185 if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL))
179 return np; 186 return np;
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c
index f370f0fa6f4c..e4132f8f51b3 100644
--- a/arch/powerpc/platforms/cell/cbe_thermal.c
+++ b/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -292,7 +292,7 @@ static struct attribute_group ppe_attribute_group = {
292/* 292/*
293 * initialize throttling with default values 293 * initialize throttling with default values
294 */ 294 */
295static void __init init_default_values(void) 295static int __init init_default_values(void)
296{ 296{
297 int cpu; 297 int cpu;
298 struct cbe_pmd_regs __iomem *pmd_regs; 298 struct cbe_pmd_regs __iomem *pmd_regs;
@@ -339,25 +339,40 @@ static void __init init_default_values(void)
339 for_each_possible_cpu (cpu) { 339 for_each_possible_cpu (cpu) {
340 pr_debug("processing cpu %d\n", cpu); 340 pr_debug("processing cpu %d\n", cpu);
341 sysdev = get_cpu_sysdev(cpu); 341 sysdev = get_cpu_sysdev(cpu);
342
343 if (!sysdev) {
344 pr_info("invalid sysdev pointer for cbe_thermal\n");
345 return -EINVAL;
346 }
347
342 pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id); 348 pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id);
343 349
350 if (!pmd_regs) {
351 pr_info("invalid CBE regs pointer for cbe_thermal\n");
352 return -EINVAL;
353 }
354
344 out_be64(&pmd_regs->tm_str2, str2); 355 out_be64(&pmd_regs->tm_str2, str2);
345 out_be64(&pmd_regs->tm_str1.val, str1.val); 356 out_be64(&pmd_regs->tm_str1.val, str1.val);
346 out_be64(&pmd_regs->tm_tpr.val, tpr.val); 357 out_be64(&pmd_regs->tm_tpr.val, tpr.val);
347 out_be64(&pmd_regs->tm_cr1.val, cr1.val); 358 out_be64(&pmd_regs->tm_cr1.val, cr1.val);
348 out_be64(&pmd_regs->tm_cr2, cr2); 359 out_be64(&pmd_regs->tm_cr2, cr2);
349 } 360 }
361
362 return 0;
350} 363}
351 364
352 365
353static int __init thermal_init(void) 366static int __init thermal_init(void)
354{ 367{
355 init_default_values(); 368 int rc = init_default_values();
356 369
357 spu_add_sysdev_attr_group(&spu_attribute_group); 370 if (rc == 0) {
358 cpu_add_sysdev_attr_group(&ppe_attribute_group); 371 spu_add_sysdev_attr_group(&spu_attribute_group);
372 cpu_add_sysdev_attr_group(&ppe_attribute_group);
373 }
359 374
360 return 0; 375 return rc;
361} 376}
362module_init(thermal_init); 377module_init(thermal_init);
363 378
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 96a8f609690c..90124228b8f4 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -35,18 +35,37 @@
35#include <asm/spu.h> 35#include <asm/spu.h>
36#include <asm/spu_priv1.h> 36#include <asm/spu_priv1.h>
37#include <asm/xmon.h> 37#include <asm/xmon.h>
38#include <asm/prom.h>
39#include "spu_priv1_mmio.h"
38 40
39const struct spu_management_ops *spu_management_ops; 41const struct spu_management_ops *spu_management_ops;
40EXPORT_SYMBOL_GPL(spu_management_ops); 42EXPORT_SYMBOL_GPL(spu_management_ops);
41 43
42const struct spu_priv1_ops *spu_priv1_ops; 44const struct spu_priv1_ops *spu_priv1_ops;
45EXPORT_SYMBOL_GPL(spu_priv1_ops);
43 46
44static struct list_head spu_list[MAX_NUMNODES]; 47struct cbe_spu_info cbe_spu_info[MAX_NUMNODES];
45static LIST_HEAD(spu_full_list); 48EXPORT_SYMBOL_GPL(cbe_spu_info);
46static DEFINE_MUTEX(spu_mutex);
47static DEFINE_SPINLOCK(spu_list_lock);
48 49
49EXPORT_SYMBOL_GPL(spu_priv1_ops); 50/*
51 * Protects cbe_spu_info and spu->number.
52 */
53static DEFINE_SPINLOCK(spu_lock);
54
55/*
56 * List of all spus in the system.
57 *
58 * This list is iterated by callers from irq context and callers that
59 * want to sleep. Thus modifications need to be done with both
60 * spu_full_list_lock and spu_full_list_mutex held, while iterating
61 * through it requires either of these locks.
62 *
63 * In addition spu_full_list_lock protects all assignmens to
64 * spu->mm.
65 */
66static LIST_HEAD(spu_full_list);
67static DEFINE_SPINLOCK(spu_full_list_lock);
68static DEFINE_MUTEX(spu_full_list_mutex);
50 69
51void spu_invalidate_slbs(struct spu *spu) 70void spu_invalidate_slbs(struct spu *spu)
52{ 71{
@@ -65,12 +84,12 @@ void spu_flush_all_slbs(struct mm_struct *mm)
65 struct spu *spu; 84 struct spu *spu;
66 unsigned long flags; 85 unsigned long flags;
67 86
68 spin_lock_irqsave(&spu_list_lock, flags); 87 spin_lock_irqsave(&spu_full_list_lock, flags);
69 list_for_each_entry(spu, &spu_full_list, full_list) { 88 list_for_each_entry(spu, &spu_full_list, full_list) {
70 if (spu->mm == mm) 89 if (spu->mm == mm)
71 spu_invalidate_slbs(spu); 90 spu_invalidate_slbs(spu);
72 } 91 }
73 spin_unlock_irqrestore(&spu_list_lock, flags); 92 spin_unlock_irqrestore(&spu_full_list_lock, flags);
74} 93}
75 94
76/* The hack below stinks... try to do something better one of 95/* The hack below stinks... try to do something better one of
@@ -88,9 +107,9 @@ void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
88{ 107{
89 unsigned long flags; 108 unsigned long flags;
90 109
91 spin_lock_irqsave(&spu_list_lock, flags); 110 spin_lock_irqsave(&spu_full_list_lock, flags);
92 spu->mm = mm; 111 spu->mm = mm;
93 spin_unlock_irqrestore(&spu_list_lock, flags); 112 spin_unlock_irqrestore(&spu_full_list_lock, flags);
94 if (mm) 113 if (mm)
95 mm_needs_global_tlbie(mm); 114 mm_needs_global_tlbie(mm);
96} 115}
@@ -390,7 +409,7 @@ static void spu_free_irqs(struct spu *spu)
390 free_irq(spu->irqs[2], spu); 409 free_irq(spu->irqs[2], spu);
391} 410}
392 411
393static void spu_init_channels(struct spu *spu) 412void spu_init_channels(struct spu *spu)
394{ 413{
395 static const struct { 414 static const struct {
396 unsigned channel; 415 unsigned channel;
@@ -423,46 +442,7 @@ static void spu_init_channels(struct spu *spu)
423 out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count); 442 out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
424 } 443 }
425} 444}
426 445EXPORT_SYMBOL_GPL(spu_init_channels);
427struct spu *spu_alloc_node(int node)
428{
429 struct spu *spu = NULL;
430
431 mutex_lock(&spu_mutex);
432 if (!list_empty(&spu_list[node])) {
433 spu = list_entry(spu_list[node].next, struct spu, list);
434 list_del_init(&spu->list);
435 pr_debug("Got SPU %d %d\n", spu->number, spu->node);
436 }
437 mutex_unlock(&spu_mutex);
438
439 if (spu)
440 spu_init_channels(spu);
441 return spu;
442}
443EXPORT_SYMBOL_GPL(spu_alloc_node);
444
445struct spu *spu_alloc(void)
446{
447 struct spu *spu = NULL;
448 int node;
449
450 for (node = 0; node < MAX_NUMNODES; node++) {
451 spu = spu_alloc_node(node);
452 if (spu)
453 break;
454 }
455
456 return spu;
457}
458
459void spu_free(struct spu *spu)
460{
461 mutex_lock(&spu_mutex);
462 list_add_tail(&spu->list, &spu_list[spu->node]);
463 mutex_unlock(&spu_mutex);
464}
465EXPORT_SYMBOL_GPL(spu_free);
466 446
467static int spu_shutdown(struct sys_device *sysdev) 447static int spu_shutdown(struct sys_device *sysdev)
468{ 448{
@@ -481,12 +461,12 @@ struct sysdev_class spu_sysdev_class = {
481int spu_add_sysdev_attr(struct sysdev_attribute *attr) 461int spu_add_sysdev_attr(struct sysdev_attribute *attr)
482{ 462{
483 struct spu *spu; 463 struct spu *spu;
484 mutex_lock(&spu_mutex);
485 464
465 mutex_lock(&spu_full_list_mutex);
486 list_for_each_entry(spu, &spu_full_list, full_list) 466 list_for_each_entry(spu, &spu_full_list, full_list)
487 sysdev_create_file(&spu->sysdev, attr); 467 sysdev_create_file(&spu->sysdev, attr);
468 mutex_unlock(&spu_full_list_mutex);
488 469
489 mutex_unlock(&spu_mutex);
490 return 0; 470 return 0;
491} 471}
492EXPORT_SYMBOL_GPL(spu_add_sysdev_attr); 472EXPORT_SYMBOL_GPL(spu_add_sysdev_attr);
@@ -494,12 +474,12 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr);
494int spu_add_sysdev_attr_group(struct attribute_group *attrs) 474int spu_add_sysdev_attr_group(struct attribute_group *attrs)
495{ 475{
496 struct spu *spu; 476 struct spu *spu;
497 mutex_lock(&spu_mutex);
498 477
478 mutex_lock(&spu_full_list_mutex);
499 list_for_each_entry(spu, &spu_full_list, full_list) 479 list_for_each_entry(spu, &spu_full_list, full_list)
500 sysfs_create_group(&spu->sysdev.kobj, attrs); 480 sysfs_create_group(&spu->sysdev.kobj, attrs);
481 mutex_unlock(&spu_full_list_mutex);
501 482
502 mutex_unlock(&spu_mutex);
503 return 0; 483 return 0;
504} 484}
505EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group); 485EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group);
@@ -508,24 +488,22 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group);
508void spu_remove_sysdev_attr(struct sysdev_attribute *attr) 488void spu_remove_sysdev_attr(struct sysdev_attribute *attr)
509{ 489{
510 struct spu *spu; 490 struct spu *spu;
511 mutex_lock(&spu_mutex);
512 491
492 mutex_lock(&spu_full_list_mutex);
513 list_for_each_entry(spu, &spu_full_list, full_list) 493 list_for_each_entry(spu, &spu_full_list, full_list)
514 sysdev_remove_file(&spu->sysdev, attr); 494 sysdev_remove_file(&spu->sysdev, attr);
515 495 mutex_unlock(&spu_full_list_mutex);
516 mutex_unlock(&spu_mutex);
517} 496}
518EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr); 497EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr);
519 498
520void spu_remove_sysdev_attr_group(struct attribute_group *attrs) 499void spu_remove_sysdev_attr_group(struct attribute_group *attrs)
521{ 500{
522 struct spu *spu; 501 struct spu *spu;
523 mutex_lock(&spu_mutex);
524 502
503 mutex_lock(&spu_full_list_mutex);
525 list_for_each_entry(spu, &spu_full_list, full_list) 504 list_for_each_entry(spu, &spu_full_list, full_list)
526 sysfs_remove_group(&spu->sysdev.kobj, attrs); 505 sysfs_remove_group(&spu->sysdev.kobj, attrs);
527 506 mutex_unlock(&spu_full_list_mutex);
528 mutex_unlock(&spu_mutex);
529} 507}
530EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group); 508EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group);
531 509
@@ -553,16 +531,19 @@ static int __init create_spu(void *data)
553 int ret; 531 int ret;
554 static int number; 532 static int number;
555 unsigned long flags; 533 unsigned long flags;
534 struct timespec ts;
556 535
557 ret = -ENOMEM; 536 ret = -ENOMEM;
558 spu = kzalloc(sizeof (*spu), GFP_KERNEL); 537 spu = kzalloc(sizeof (*spu), GFP_KERNEL);
559 if (!spu) 538 if (!spu)
560 goto out; 539 goto out;
561 540
541 spu->alloc_state = SPU_FREE;
542
562 spin_lock_init(&spu->register_lock); 543 spin_lock_init(&spu->register_lock);
563 mutex_lock(&spu_mutex); 544 spin_lock(&spu_lock);
564 spu->number = number++; 545 spu->number = number++;
565 mutex_unlock(&spu_mutex); 546 spin_unlock(&spu_lock);
566 547
567 ret = spu_create_spu(spu, data); 548 ret = spu_create_spu(spu, data);
568 549
@@ -579,15 +560,22 @@ static int __init create_spu(void *data)
579 if (ret) 560 if (ret)
580 goto out_free_irqs; 561 goto out_free_irqs;
581 562
582 mutex_lock(&spu_mutex); 563 mutex_lock(&cbe_spu_info[spu->node].list_mutex);
583 spin_lock_irqsave(&spu_list_lock, flags); 564 list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
584 list_add(&spu->list, &spu_list[spu->node]); 565 cbe_spu_info[spu->node].n_spus++;
566 mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
567
568 mutex_lock(&spu_full_list_mutex);
569 spin_lock_irqsave(&spu_full_list_lock, flags);
585 list_add(&spu->full_list, &spu_full_list); 570 list_add(&spu->full_list, &spu_full_list);
586 spin_unlock_irqrestore(&spu_list_lock, flags); 571 spin_unlock_irqrestore(&spu_full_list_lock, flags);
587 mutex_unlock(&spu_mutex); 572 mutex_unlock(&spu_full_list_mutex);
573
574 spu->stats.util_state = SPU_UTIL_IDLE_LOADED;
575 ktime_get_ts(&ts);
576 spu->stats.tstamp = timespec_to_ns(&ts);
588 577
589 spu->stats.utilization_state = SPU_UTIL_IDLE; 578 INIT_LIST_HEAD(&spu->aff_list);
590 spu->stats.tstamp = jiffies;
591 579
592 goto out; 580 goto out;
593 581
@@ -608,12 +596,20 @@ static const char *spu_state_names[] = {
608static unsigned long long spu_acct_time(struct spu *spu, 596static unsigned long long spu_acct_time(struct spu *spu,
609 enum spu_utilization_state state) 597 enum spu_utilization_state state)
610{ 598{
599 struct timespec ts;
611 unsigned long long time = spu->stats.times[state]; 600 unsigned long long time = spu->stats.times[state];
612 601
613 if (spu->stats.utilization_state == state) 602 /*
614 time += jiffies - spu->stats.tstamp; 603 * If the spu is idle or the context is stopped, utilization
604 * statistics are not updated. Apply the time delta from the
605 * last recorded state of the spu.
606 */
607 if (spu->stats.util_state == state) {
608 ktime_get_ts(&ts);
609 time += timespec_to_ns(&ts) - spu->stats.tstamp;
610 }
615 611
616 return jiffies_to_msecs(time); 612 return time / NSEC_PER_MSEC;
617} 613}
618 614
619 615
@@ -623,11 +619,11 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf)
623 619
624 return sprintf(buf, "%s %llu %llu %llu %llu " 620 return sprintf(buf, "%s %llu %llu %llu %llu "
625 "%llu %llu %llu %llu %llu %llu %llu %llu\n", 621 "%llu %llu %llu %llu %llu %llu %llu %llu\n",
626 spu_state_names[spu->stats.utilization_state], 622 spu_state_names[spu->stats.util_state],
627 spu_acct_time(spu, SPU_UTIL_USER), 623 spu_acct_time(spu, SPU_UTIL_USER),
628 spu_acct_time(spu, SPU_UTIL_SYSTEM), 624 spu_acct_time(spu, SPU_UTIL_SYSTEM),
629 spu_acct_time(spu, SPU_UTIL_IOWAIT), 625 spu_acct_time(spu, SPU_UTIL_IOWAIT),
630 spu_acct_time(spu, SPU_UTIL_IDLE), 626 spu_acct_time(spu, SPU_UTIL_IDLE_LOADED),
631 spu->stats.vol_ctx_switch, 627 spu->stats.vol_ctx_switch,
632 spu->stats.invol_ctx_switch, 628 spu->stats.invol_ctx_switch,
633 spu->stats.slb_flt, 629 spu->stats.slb_flt,
@@ -640,12 +636,146 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf)
640 636
641static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL); 637static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL);
642 638
639/* Hardcoded affinity idxs for QS20 */
640#define SPES_PER_BE 8
641static int QS20_reg_idxs[SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 };
642static int QS20_reg_memory[SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 };
643
644static struct spu *spu_lookup_reg(int node, u32 reg)
645{
646 struct spu *spu;
647
648 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
649 if (*(u32 *)get_property(spu_devnode(spu), "reg", NULL) == reg)
650 return spu;
651 }
652 return NULL;
653}
654
655static void init_aff_QS20_harcoded(void)
656{
657 int node, i;
658 struct spu *last_spu, *spu;
659 u32 reg;
660
661 for (node = 0; node < MAX_NUMNODES; node++) {
662 last_spu = NULL;
663 for (i = 0; i < SPES_PER_BE; i++) {
664 reg = QS20_reg_idxs[i];
665 spu = spu_lookup_reg(node, reg);
666 if (!spu)
667 continue;
668 spu->has_mem_affinity = QS20_reg_memory[reg];
669 if (last_spu)
670 list_add_tail(&spu->aff_list,
671 &last_spu->aff_list);
672 last_spu = spu;
673 }
674 }
675}
676
677static int of_has_vicinity(void)
678{
679 struct spu* spu;
680
681 spu = list_entry(cbe_spu_info[0].spus.next, struct spu, cbe_list);
682 return of_find_property(spu_devnode(spu), "vicinity", NULL) != NULL;
683}
684
685static struct spu *aff_devnode_spu(int cbe, struct device_node *dn)
686{
687 struct spu *spu;
688
689 list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list)
690 if (spu_devnode(spu) == dn)
691 return spu;
692 return NULL;
693}
694
695static struct spu *
696aff_node_next_to(int cbe, struct device_node *target, struct device_node *avoid)
697{
698 struct spu *spu;
699 const phandle *vic_handles;
700 int lenp, i;
701
702 list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) {
703 if (spu_devnode(spu) == avoid)
704 continue;
705 vic_handles = get_property(spu_devnode(spu), "vicinity", &lenp);
706 for (i=0; i < (lenp / sizeof(phandle)); i++) {
707 if (vic_handles[i] == target->linux_phandle)
708 return spu;
709 }
710 }
711 return NULL;
712}
713
714static void init_aff_fw_vicinity_node(int cbe)
715{
716 struct spu *spu, *last_spu;
717 struct device_node *vic_dn, *last_spu_dn;
718 phandle avoid_ph;
719 const phandle *vic_handles;
720 const char *name;
721 int lenp, i, added, mem_aff;
722
723 last_spu = list_entry(cbe_spu_info[cbe].spus.next, struct spu, cbe_list);
724 avoid_ph = 0;
725 for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) {
726 last_spu_dn = spu_devnode(last_spu);
727 vic_handles = get_property(last_spu_dn, "vicinity", &lenp);
728
729 for (i = 0; i < (lenp / sizeof(phandle)); i++) {
730 if (vic_handles[i] == avoid_ph)
731 continue;
732
733 vic_dn = of_find_node_by_phandle(vic_handles[i]);
734 if (!vic_dn)
735 continue;
736
737 name = get_property(vic_dn, "name", NULL);
738 if (strcmp(name, "spe") == 0) {
739 spu = aff_devnode_spu(cbe, vic_dn);
740 avoid_ph = last_spu_dn->linux_phandle;
741 }
742 else {
743 mem_aff = strcmp(name, "mic-tm") == 0;
744 spu = aff_node_next_to(cbe, vic_dn, last_spu_dn);
745 if (!spu)
746 continue;
747 if (mem_aff) {
748 last_spu->has_mem_affinity = 1;
749 spu->has_mem_affinity = 1;
750 }
751 avoid_ph = vic_dn->linux_phandle;
752 }
753 list_add_tail(&spu->aff_list, &last_spu->aff_list);
754 last_spu = spu;
755 break;
756 }
757 }
758}
759
760static void init_aff_fw_vicinity(void)
761{
762 int cbe;
763
764 /* sets has_mem_affinity for each spu, as long as the
765 * spu->aff_list list, linking each spu to its neighbors
766 */
767 for (cbe = 0; cbe < MAX_NUMNODES; cbe++)
768 init_aff_fw_vicinity_node(cbe);
769}
770
643static int __init init_spu_base(void) 771static int __init init_spu_base(void)
644{ 772{
645 int i, ret = 0; 773 int i, ret = 0;
646 774
647 for (i = 0; i < MAX_NUMNODES; i++) 775 for (i = 0; i < MAX_NUMNODES; i++) {
648 INIT_LIST_HEAD(&spu_list[i]); 776 mutex_init(&cbe_spu_info[i].list_mutex);
777 INIT_LIST_HEAD(&cbe_spu_info[i].spus);
778 }
649 779
650 if (!spu_management_ops) 780 if (!spu_management_ops)
651 goto out; 781 goto out;
@@ -675,16 +805,25 @@ static int __init init_spu_base(void)
675 fb_append_extra_logo(&logo_spe_clut224, ret); 805 fb_append_extra_logo(&logo_spe_clut224, ret);
676 } 806 }
677 807
808 mutex_lock(&spu_full_list_mutex);
678 xmon_register_spus(&spu_full_list); 809 xmon_register_spus(&spu_full_list);
679 810 crash_register_spus(&spu_full_list);
811 mutex_unlock(&spu_full_list_mutex);
680 spu_add_sysdev_attr(&attr_stat); 812 spu_add_sysdev_attr(&attr_stat);
681 813
814 if (of_has_vicinity()) {
815 init_aff_fw_vicinity();
816 } else {
817 long root = of_get_flat_dt_root();
818 if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
819 init_aff_QS20_harcoded();
820 }
821
682 return 0; 822 return 0;
683 823
684 out_unregister_sysdev_class: 824 out_unregister_sysdev_class:
685 sysdev_class_unregister(&spu_sysdev_class); 825 sysdev_class_unregister(&spu_sysdev_class);
686 out: 826 out:
687
688 return ret; 827 return ret;
689} 828}
690module_init(init_spu_base); 829module_init(init_spu_base);
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
index 261b507a901a..dd2c6688c8aa 100644
--- a/arch/powerpc/platforms/cell/spu_syscalls.c
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -34,14 +34,27 @@ struct spufs_calls spufs_calls = {
34 * this file is not used and the syscalls directly enter the fs code */ 34 * this file is not used and the syscalls directly enter the fs code */
35 35
36asmlinkage long sys_spu_create(const char __user *name, 36asmlinkage long sys_spu_create(const char __user *name,
37 unsigned int flags, mode_t mode) 37 unsigned int flags, mode_t mode, int neighbor_fd)
38{ 38{
39 long ret; 39 long ret;
40 struct module *owner = spufs_calls.owner; 40 struct module *owner = spufs_calls.owner;
41 struct file *neighbor;
42 int fput_needed;
41 43
42 ret = -ENOSYS; 44 ret = -ENOSYS;
43 if (owner && try_module_get(owner)) { 45 if (owner && try_module_get(owner)) {
44 ret = spufs_calls.create_thread(name, flags, mode); 46 if (flags & SPU_CREATE_AFFINITY_SPU) {
47 neighbor = fget_light(neighbor_fd, &fput_needed);
48 if (neighbor) {
49 ret = spufs_calls.create_thread(name, flags,
50 mode, neighbor);
51 fput_light(neighbor, fput_needed);
52 }
53 }
54 else {
55 ret = spufs_calls.create_thread(name, flags,
56 mode, NULL);
57 }
45 module_put(owner); 58 module_put(owner);
46 } 59 }
47 return ret; 60 return ret;
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index 6d7bd60f5380..6694f86d7000 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -22,6 +22,7 @@
22 22
23#include <linux/fs.h> 23#include <linux/fs.h>
24#include <linux/mm.h> 24#include <linux/mm.h>
25#include <linux/module.h>
25#include <linux/slab.h> 26#include <linux/slab.h>
26#include <asm/atomic.h> 27#include <asm/atomic.h>
27#include <asm/spu.h> 28#include <asm/spu.h>
@@ -55,12 +56,12 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
55 ctx->ops = &spu_backing_ops; 56 ctx->ops = &spu_backing_ops;
56 ctx->owner = get_task_mm(current); 57 ctx->owner = get_task_mm(current);
57 INIT_LIST_HEAD(&ctx->rq); 58 INIT_LIST_HEAD(&ctx->rq);
59 INIT_LIST_HEAD(&ctx->aff_list);
58 if (gang) 60 if (gang)
59 spu_gang_add_ctx(gang, ctx); 61 spu_gang_add_ctx(gang, ctx);
60 ctx->cpus_allowed = current->cpus_allowed; 62 ctx->cpus_allowed = current->cpus_allowed;
61 spu_set_timeslice(ctx); 63 spu_set_timeslice(ctx);
62 ctx->stats.execution_state = SPUCTX_UTIL_USER; 64 ctx->stats.util_state = SPU_UTIL_IDLE_LOADED;
63 ctx->stats.tstamp = jiffies;
64 65
65 atomic_inc(&nr_spu_contexts); 66 atomic_inc(&nr_spu_contexts);
66 goto out; 67 goto out;
@@ -81,6 +82,8 @@ void destroy_spu_context(struct kref *kref)
81 spu_fini_csa(&ctx->csa); 82 spu_fini_csa(&ctx->csa);
82 if (ctx->gang) 83 if (ctx->gang)
83 spu_gang_remove_ctx(ctx->gang, ctx); 84 spu_gang_remove_ctx(ctx->gang, ctx);
85 if (ctx->prof_priv_kref)
86 kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
84 BUG_ON(!list_empty(&ctx->rq)); 87 BUG_ON(!list_empty(&ctx->rq));
85 atomic_dec(&nr_spu_contexts); 88 atomic_dec(&nr_spu_contexts);
86 kfree(ctx); 89 kfree(ctx);
@@ -166,6 +169,39 @@ int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags)
166void spu_acquire_saved(struct spu_context *ctx) 169void spu_acquire_saved(struct spu_context *ctx)
167{ 170{
168 spu_acquire(ctx); 171 spu_acquire(ctx);
169 if (ctx->state != SPU_STATE_SAVED) 172 if (ctx->state != SPU_STATE_SAVED) {
173 set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags);
170 spu_deactivate(ctx); 174 spu_deactivate(ctx);
175 }
176}
177
178/**
179 * spu_release_saved - unlock spu context and return it to the runqueue
180 * @ctx: context to unlock
181 */
182void spu_release_saved(struct spu_context *ctx)
183{
184 BUG_ON(ctx->state != SPU_STATE_SAVED);
185
186 if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags))
187 spu_activate(ctx, 0);
188
189 spu_release(ctx);
171} 190}
191
192void spu_set_profile_private_kref(struct spu_context *ctx,
193 struct kref *prof_info_kref,
194 void ( * prof_info_release) (struct kref *kref))
195{
196 ctx->prof_priv_kref = prof_info_kref;
197 ctx->prof_priv_release = prof_info_release;
198}
199EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
200
201void *spu_get_profile_private_kref(struct spu_context *ctx)
202{
203 return ctx->prof_priv_kref;
204}
205EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
206
207
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index 5d9ad5a0307b..5e31799b1e3f 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -226,7 +226,7 @@ static void spufs_arch_write_notes(struct file *file)
226 spu_acquire_saved(ctx_info->ctx); 226 spu_acquire_saved(ctx_info->ctx);
227 for (j = 0; j < spufs_coredump_num_notes; j++) 227 for (j = 0; j < spufs_coredump_num_notes; j++)
228 spufs_arch_write_note(ctx_info, j, file); 228 spufs_arch_write_note(ctx_info, j, file);
229 spu_release(ctx_info->ctx); 229 spu_release_saved(ctx_info->ctx);
230 list_del(&ctx_info->list); 230 list_del(&ctx_info->list);
231 kfree(ctx_info); 231 kfree(ctx_info);
232 } 232 }
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index f53a07437472..917eab4be486 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -179,16 +179,14 @@ int spufs_handle_class1(struct spu_context *ctx)
179 if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))) 179 if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
180 return 0; 180 return 0;
181 181
182 spuctx_switch_state(ctx, SPUCTX_UTIL_IOWAIT); 182 spuctx_switch_state(ctx, SPU_UTIL_IOWAIT);
183 183
184 pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea, 184 pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea,
185 dsisr, ctx->state); 185 dsisr, ctx->state);
186 186
187 ctx->stats.hash_flt++; 187 ctx->stats.hash_flt++;
188 if (ctx->state == SPU_STATE_RUNNABLE) { 188 if (ctx->state == SPU_STATE_RUNNABLE)
189 ctx->spu->stats.hash_flt++; 189 ctx->spu->stats.hash_flt++;
190 spu_switch_state(ctx->spu, SPU_UTIL_IOWAIT);
191 }
192 190
193 /* we must not hold the lock when entering spu_handle_mm_fault */ 191 /* we must not hold the lock when entering spu_handle_mm_fault */
194 spu_release(ctx); 192 spu_release(ctx);
@@ -226,7 +224,7 @@ int spufs_handle_class1(struct spu_context *ctx)
226 } else 224 } else
227 spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE); 225 spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
228 226
229 spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM); 227 spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
230 return ret; 228 return ret;
231} 229}
232EXPORT_SYMBOL_GPL(spufs_handle_class1); 230EXPORT_SYMBOL_GPL(spufs_handle_class1);
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index c2814ea96af2..7de4e919687b 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -370,7 +370,7 @@ spufs_regs_read(struct file *file, char __user *buffer,
370 370
371 spu_acquire_saved(ctx); 371 spu_acquire_saved(ctx);
372 ret = __spufs_regs_read(ctx, buffer, size, pos); 372 ret = __spufs_regs_read(ctx, buffer, size, pos);
373 spu_release(ctx); 373 spu_release_saved(ctx);
374 return ret; 374 return ret;
375} 375}
376 376
@@ -392,7 +392,7 @@ spufs_regs_write(struct file *file, const char __user *buffer,
392 ret = copy_from_user(lscsa->gprs + *pos - size, 392 ret = copy_from_user(lscsa->gprs + *pos - size,
393 buffer, size) ? -EFAULT : size; 393 buffer, size) ? -EFAULT : size;
394 394
395 spu_release(ctx); 395 spu_release_saved(ctx);
396 return ret; 396 return ret;
397} 397}
398 398
@@ -421,7 +421,7 @@ spufs_fpcr_read(struct file *file, char __user * buffer,
421 421
422 spu_acquire_saved(ctx); 422 spu_acquire_saved(ctx);
423 ret = __spufs_fpcr_read(ctx, buffer, size, pos); 423 ret = __spufs_fpcr_read(ctx, buffer, size, pos);
424 spu_release(ctx); 424 spu_release_saved(ctx);
425 return ret; 425 return ret;
426} 426}
427 427
@@ -443,7 +443,7 @@ spufs_fpcr_write(struct file *file, const char __user * buffer,
443 ret = copy_from_user((char *)&lscsa->fpcr + *pos - size, 443 ret = copy_from_user((char *)&lscsa->fpcr + *pos - size,
444 buffer, size) ? -EFAULT : size; 444 buffer, size) ? -EFAULT : size;
445 445
446 spu_release(ctx); 446 spu_release_saved(ctx);
447 return ret; 447 return ret;
448} 448}
449 449
@@ -868,7 +868,7 @@ static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
868 868
869 spu_acquire_saved(ctx); 869 spu_acquire_saved(ctx);
870 ret = __spufs_signal1_read(ctx, buf, len, pos); 870 ret = __spufs_signal1_read(ctx, buf, len, pos);
871 spu_release(ctx); 871 spu_release_saved(ctx);
872 872
873 return ret; 873 return ret;
874} 874}
@@ -934,6 +934,13 @@ static const struct file_operations spufs_signal1_fops = {
934 .mmap = spufs_signal1_mmap, 934 .mmap = spufs_signal1_mmap,
935}; 935};
936 936
937static const struct file_operations spufs_signal1_nosched_fops = {
938 .open = spufs_signal1_open,
939 .release = spufs_signal1_release,
940 .write = spufs_signal1_write,
941 .mmap = spufs_signal1_mmap,
942};
943
937static int spufs_signal2_open(struct inode *inode, struct file *file) 944static int spufs_signal2_open(struct inode *inode, struct file *file)
938{ 945{
939 struct spufs_inode_info *i = SPUFS_I(inode); 946 struct spufs_inode_info *i = SPUFS_I(inode);
@@ -992,7 +999,7 @@ static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
992 999
993 spu_acquire_saved(ctx); 1000 spu_acquire_saved(ctx);
994 ret = __spufs_signal2_read(ctx, buf, len, pos); 1001 ret = __spufs_signal2_read(ctx, buf, len, pos);
995 spu_release(ctx); 1002 spu_release_saved(ctx);
996 1003
997 return ret; 1004 return ret;
998} 1005}
@@ -1062,6 +1069,13 @@ static const struct file_operations spufs_signal2_fops = {
1062 .mmap = spufs_signal2_mmap, 1069 .mmap = spufs_signal2_mmap,
1063}; 1070};
1064 1071
1072static const struct file_operations spufs_signal2_nosched_fops = {
1073 .open = spufs_signal2_open,
1074 .release = spufs_signal2_release,
1075 .write = spufs_signal2_write,
1076 .mmap = spufs_signal2_mmap,
1077};
1078
1065static void spufs_signal1_type_set(void *data, u64 val) 1079static void spufs_signal1_type_set(void *data, u64 val)
1066{ 1080{
1067 struct spu_context *ctx = data; 1081 struct spu_context *ctx = data;
@@ -1612,7 +1626,7 @@ static void spufs_decr_set(void *data, u64 val)
1612 struct spu_lscsa *lscsa = ctx->csa.lscsa; 1626 struct spu_lscsa *lscsa = ctx->csa.lscsa;
1613 spu_acquire_saved(ctx); 1627 spu_acquire_saved(ctx);
1614 lscsa->decr.slot[0] = (u32) val; 1628 lscsa->decr.slot[0] = (u32) val;
1615 spu_release(ctx); 1629 spu_release_saved(ctx);
1616} 1630}
1617 1631
1618static u64 __spufs_decr_get(void *data) 1632static u64 __spufs_decr_get(void *data)
@@ -1628,7 +1642,7 @@ static u64 spufs_decr_get(void *data)
1628 u64 ret; 1642 u64 ret;
1629 spu_acquire_saved(ctx); 1643 spu_acquire_saved(ctx);
1630 ret = __spufs_decr_get(data); 1644 ret = __spufs_decr_get(data);
1631 spu_release(ctx); 1645 spu_release_saved(ctx);
1632 return ret; 1646 return ret;
1633} 1647}
1634DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, 1648DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
@@ -1637,17 +1651,21 @@ DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
1637static void spufs_decr_status_set(void *data, u64 val) 1651static void spufs_decr_status_set(void *data, u64 val)
1638{ 1652{
1639 struct spu_context *ctx = data; 1653 struct spu_context *ctx = data;
1640 struct spu_lscsa *lscsa = ctx->csa.lscsa;
1641 spu_acquire_saved(ctx); 1654 spu_acquire_saved(ctx);
1642 lscsa->decr_status.slot[0] = (u32) val; 1655 if (val)
1643 spu_release(ctx); 1656 ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
1657 else
1658 ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
1659 spu_release_saved(ctx);
1644} 1660}
1645 1661
1646static u64 __spufs_decr_status_get(void *data) 1662static u64 __spufs_decr_status_get(void *data)
1647{ 1663{
1648 struct spu_context *ctx = data; 1664 struct spu_context *ctx = data;
1649 struct spu_lscsa *lscsa = ctx->csa.lscsa; 1665 if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING)
1650 return lscsa->decr_status.slot[0]; 1666 return SPU_DECR_STATUS_RUNNING;
1667 else
1668 return 0;
1651} 1669}
1652 1670
1653static u64 spufs_decr_status_get(void *data) 1671static u64 spufs_decr_status_get(void *data)
@@ -1656,7 +1674,7 @@ static u64 spufs_decr_status_get(void *data)
1656 u64 ret; 1674 u64 ret;
1657 spu_acquire_saved(ctx); 1675 spu_acquire_saved(ctx);
1658 ret = __spufs_decr_status_get(data); 1676 ret = __spufs_decr_status_get(data);
1659 spu_release(ctx); 1677 spu_release_saved(ctx);
1660 return ret; 1678 return ret;
1661} 1679}
1662DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get, 1680DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get,
@@ -1668,7 +1686,7 @@ static void spufs_event_mask_set(void *data, u64 val)
1668 struct spu_lscsa *lscsa = ctx->csa.lscsa; 1686 struct spu_lscsa *lscsa = ctx->csa.lscsa;
1669 spu_acquire_saved(ctx); 1687 spu_acquire_saved(ctx);
1670 lscsa->event_mask.slot[0] = (u32) val; 1688 lscsa->event_mask.slot[0] = (u32) val;
1671 spu_release(ctx); 1689 spu_release_saved(ctx);
1672} 1690}
1673 1691
1674static u64 __spufs_event_mask_get(void *data) 1692static u64 __spufs_event_mask_get(void *data)
@@ -1684,7 +1702,7 @@ static u64 spufs_event_mask_get(void *data)
1684 u64 ret; 1702 u64 ret;
1685 spu_acquire_saved(ctx); 1703 spu_acquire_saved(ctx);
1686 ret = __spufs_event_mask_get(data); 1704 ret = __spufs_event_mask_get(data);
1687 spu_release(ctx); 1705 spu_release_saved(ctx);
1688 return ret; 1706 return ret;
1689} 1707}
1690DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get, 1708DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
@@ -1708,7 +1726,7 @@ static u64 spufs_event_status_get(void *data)
1708 1726
1709 spu_acquire_saved(ctx); 1727 spu_acquire_saved(ctx);
1710 ret = __spufs_event_status_get(data); 1728 ret = __spufs_event_status_get(data);
1711 spu_release(ctx); 1729 spu_release_saved(ctx);
1712 return ret; 1730 return ret;
1713} 1731}
1714DEFINE_SIMPLE_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get, 1732DEFINE_SIMPLE_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get,
@@ -1720,7 +1738,7 @@ static void spufs_srr0_set(void *data, u64 val)
1720 struct spu_lscsa *lscsa = ctx->csa.lscsa; 1738 struct spu_lscsa *lscsa = ctx->csa.lscsa;
1721 spu_acquire_saved(ctx); 1739 spu_acquire_saved(ctx);
1722 lscsa->srr0.slot[0] = (u32) val; 1740 lscsa->srr0.slot[0] = (u32) val;
1723 spu_release(ctx); 1741 spu_release_saved(ctx);
1724} 1742}
1725 1743
1726static u64 spufs_srr0_get(void *data) 1744static u64 spufs_srr0_get(void *data)
@@ -1730,7 +1748,7 @@ static u64 spufs_srr0_get(void *data)
1730 u64 ret; 1748 u64 ret;
1731 spu_acquire_saved(ctx); 1749 spu_acquire_saved(ctx);
1732 ret = lscsa->srr0.slot[0]; 1750 ret = lscsa->srr0.slot[0];
1733 spu_release(ctx); 1751 spu_release_saved(ctx);
1734 return ret; 1752 return ret;
1735} 1753}
1736DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set, 1754DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set,
@@ -1786,7 +1804,7 @@ static u64 spufs_lslr_get(void *data)
1786 1804
1787 spu_acquire_saved(ctx); 1805 spu_acquire_saved(ctx);
1788 ret = __spufs_lslr_get(data); 1806 ret = __spufs_lslr_get(data);
1789 spu_release(ctx); 1807 spu_release_saved(ctx);
1790 1808
1791 return ret; 1809 return ret;
1792} 1810}
@@ -1850,7 +1868,7 @@ static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
1850 spin_lock(&ctx->csa.register_lock); 1868 spin_lock(&ctx->csa.register_lock);
1851 ret = __spufs_mbox_info_read(ctx, buf, len, pos); 1869 ret = __spufs_mbox_info_read(ctx, buf, len, pos);
1852 spin_unlock(&ctx->csa.register_lock); 1870 spin_unlock(&ctx->csa.register_lock);
1853 spu_release(ctx); 1871 spu_release_saved(ctx);
1854 1872
1855 return ret; 1873 return ret;
1856} 1874}
@@ -1888,7 +1906,7 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
1888 spin_lock(&ctx->csa.register_lock); 1906 spin_lock(&ctx->csa.register_lock);
1889 ret = __spufs_ibox_info_read(ctx, buf, len, pos); 1907 ret = __spufs_ibox_info_read(ctx, buf, len, pos);
1890 spin_unlock(&ctx->csa.register_lock); 1908 spin_unlock(&ctx->csa.register_lock);
1891 spu_release(ctx); 1909 spu_release_saved(ctx);
1892 1910
1893 return ret; 1911 return ret;
1894} 1912}
@@ -1929,7 +1947,7 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
1929 spin_lock(&ctx->csa.register_lock); 1947 spin_lock(&ctx->csa.register_lock);
1930 ret = __spufs_wbox_info_read(ctx, buf, len, pos); 1948 ret = __spufs_wbox_info_read(ctx, buf, len, pos);
1931 spin_unlock(&ctx->csa.register_lock); 1949 spin_unlock(&ctx->csa.register_lock);
1932 spu_release(ctx); 1950 spu_release_saved(ctx);
1933 1951
1934 return ret; 1952 return ret;
1935} 1953}
@@ -1979,7 +1997,7 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
1979 spin_lock(&ctx->csa.register_lock); 1997 spin_lock(&ctx->csa.register_lock);
1980 ret = __spufs_dma_info_read(ctx, buf, len, pos); 1998 ret = __spufs_dma_info_read(ctx, buf, len, pos);
1981 spin_unlock(&ctx->csa.register_lock); 1999 spin_unlock(&ctx->csa.register_lock);
1982 spu_release(ctx); 2000 spu_release_saved(ctx);
1983 2001
1984 return ret; 2002 return ret;
1985} 2003}
@@ -2030,7 +2048,7 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
2030 spin_lock(&ctx->csa.register_lock); 2048 spin_lock(&ctx->csa.register_lock);
2031 ret = __spufs_proxydma_info_read(ctx, buf, len, pos); 2049 ret = __spufs_proxydma_info_read(ctx, buf, len, pos);
2032 spin_unlock(&ctx->csa.register_lock); 2050 spin_unlock(&ctx->csa.register_lock);
2033 spu_release(ctx); 2051 spu_release_saved(ctx);
2034 2052
2035 return ret; 2053 return ret;
2036} 2054}
@@ -2065,14 +2083,26 @@ static const char *ctx_state_names[] = {
2065}; 2083};
2066 2084
2067static unsigned long long spufs_acct_time(struct spu_context *ctx, 2085static unsigned long long spufs_acct_time(struct spu_context *ctx,
2068 enum spuctx_execution_state state) 2086 enum spu_utilization_state state)
2069{ 2087{
2070 unsigned long time = ctx->stats.times[state]; 2088 struct timespec ts;
2089 unsigned long long time = ctx->stats.times[state];
2071 2090
2072 if (ctx->stats.execution_state == state) 2091 /*
2073 time += jiffies - ctx->stats.tstamp; 2092 * In general, utilization statistics are updated by the controlling
2093 * thread as the spu context moves through various well defined
2094 * state transitions, but if the context is lazily loaded its
2095 * utilization statistics are not updated as the controlling thread
2096 * is not tightly coupled with the execution of the spu context. We
2097 * calculate and apply the time delta from the last recorded state
2098 * of the spu context.
2099 */
2100 if (ctx->spu && ctx->stats.util_state == state) {
2101 ktime_get_ts(&ts);
2102 time += timespec_to_ns(&ts) - ctx->stats.tstamp;
2103 }
2074 2104
2075 return jiffies_to_msecs(time); 2105 return time / NSEC_PER_MSEC;
2076} 2106}
2077 2107
2078static unsigned long long spufs_slb_flts(struct spu_context *ctx) 2108static unsigned long long spufs_slb_flts(struct spu_context *ctx)
@@ -2107,11 +2137,11 @@ static int spufs_show_stat(struct seq_file *s, void *private)
2107 spu_acquire(ctx); 2137 spu_acquire(ctx);
2108 seq_printf(s, "%s %llu %llu %llu %llu " 2138 seq_printf(s, "%s %llu %llu %llu %llu "
2109 "%llu %llu %llu %llu %llu %llu %llu %llu\n", 2139 "%llu %llu %llu %llu %llu %llu %llu %llu\n",
2110 ctx_state_names[ctx->stats.execution_state], 2140 ctx_state_names[ctx->stats.util_state],
2111 spufs_acct_time(ctx, SPUCTX_UTIL_USER), 2141 spufs_acct_time(ctx, SPU_UTIL_USER),
2112 spufs_acct_time(ctx, SPUCTX_UTIL_SYSTEM), 2142 spufs_acct_time(ctx, SPU_UTIL_SYSTEM),
2113 spufs_acct_time(ctx, SPUCTX_UTIL_IOWAIT), 2143 spufs_acct_time(ctx, SPU_UTIL_IOWAIT),
2114 spufs_acct_time(ctx, SPUCTX_UTIL_LOADED), 2144 spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED),
2115 ctx->stats.vol_ctx_switch, 2145 ctx->stats.vol_ctx_switch,
2116 ctx->stats.invol_ctx_switch, 2146 ctx->stats.invol_ctx_switch,
2117 spufs_slb_flts(ctx), 2147 spufs_slb_flts(ctx),
@@ -2184,8 +2214,8 @@ struct tree_descr spufs_dir_nosched_contents[] = {
2184 { "mbox_stat", &spufs_mbox_stat_fops, 0444, }, 2214 { "mbox_stat", &spufs_mbox_stat_fops, 0444, },
2185 { "ibox_stat", &spufs_ibox_stat_fops, 0444, }, 2215 { "ibox_stat", &spufs_ibox_stat_fops, 0444, },
2186 { "wbox_stat", &spufs_wbox_stat_fops, 0444, }, 2216 { "wbox_stat", &spufs_wbox_stat_fops, 0444, },
2187 { "signal1", &spufs_signal1_fops, 0666, }, 2217 { "signal1", &spufs_signal1_nosched_fops, 0222, },
2188 { "signal2", &spufs_signal2_fops, 0666, }, 2218 { "signal2", &spufs_signal2_nosched_fops, 0222, },
2189 { "signal1_type", &spufs_signal1_type, 0666, }, 2219 { "signal1_type", &spufs_signal1_type, 0666, },
2190 { "signal2_type", &spufs_signal2_type, 0666, }, 2220 { "signal2_type", &spufs_signal2_type, 0666, },
2191 { "mss", &spufs_mss_fops, 0666, }, 2221 { "mss", &spufs_mss_fops, 0666, },
diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c
index 212ea78f9051..71a443253021 100644
--- a/arch/powerpc/platforms/cell/spufs/gang.c
+++ b/arch/powerpc/platforms/cell/spufs/gang.c
@@ -35,7 +35,9 @@ struct spu_gang *alloc_spu_gang(void)
35 35
36 kref_init(&gang->kref); 36 kref_init(&gang->kref);
37 mutex_init(&gang->mutex); 37 mutex_init(&gang->mutex);
38 mutex_init(&gang->aff_mutex);
38 INIT_LIST_HEAD(&gang->list); 39 INIT_LIST_HEAD(&gang->list);
40 INIT_LIST_HEAD(&gang->aff_list_head);
39 41
40out: 42out:
41 return gang; 43 return gang;
@@ -73,6 +75,10 @@ void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx)
73{ 75{
74 mutex_lock(&gang->mutex); 76 mutex_lock(&gang->mutex);
75 WARN_ON(ctx->gang != gang); 77 WARN_ON(ctx->gang != gang);
78 if (!list_empty(&ctx->aff_list)) {
79 list_del_init(&ctx->aff_list);
80 gang->aff_flags &= ~AFF_OFFSETS_SET;
81 }
76 list_del_init(&ctx->gang_list); 82 list_del_init(&ctx->gang_list);
77 gang->contexts--; 83 gang->contexts--;
78 mutex_unlock(&gang->mutex); 84 mutex_unlock(&gang->mutex);
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 7eb4d6cbcb74..b3d0dd118dd0 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -316,11 +316,107 @@ out:
316 return ret; 316 return ret;
317} 317}
318 318
319static int spufs_create_context(struct inode *inode, 319static struct spu_context *
320 struct dentry *dentry, 320spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
321 struct vfsmount *mnt, int flags, int mode) 321 struct file *filp)
322{
323 struct spu_context *tmp, *neighbor;
324 int count, node;
325 int aff_supp;
326
327 aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next,
328 struct spu, cbe_list))->aff_list);
329
330 if (!aff_supp)
331 return ERR_PTR(-EINVAL);
332
333 if (flags & SPU_CREATE_GANG)
334 return ERR_PTR(-EINVAL);
335
336 if (flags & SPU_CREATE_AFFINITY_MEM &&
337 gang->aff_ref_ctx &&
338 gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
339 return ERR_PTR(-EEXIST);
340
341 if (gang->aff_flags & AFF_MERGED)
342 return ERR_PTR(-EBUSY);
343
344 neighbor = NULL;
345 if (flags & SPU_CREATE_AFFINITY_SPU) {
346 if (!filp || filp->f_op != &spufs_context_fops)
347 return ERR_PTR(-EINVAL);
348
349 neighbor = get_spu_context(
350 SPUFS_I(filp->f_dentry->d_inode)->i_ctx);
351
352 if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
353 !list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
354 !list_entry(neighbor->aff_list.next, struct spu_context,
355 aff_list)->aff_head)
356 return ERR_PTR(-EEXIST);
357
358 if (gang != neighbor->gang)
359 return ERR_PTR(-EINVAL);
360
361 count = 1;
362 list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
363 count++;
364 if (list_empty(&neighbor->aff_list))
365 count++;
366
367 for (node = 0; node < MAX_NUMNODES; node++) {
368 if ((cbe_spu_info[node].n_spus - atomic_read(
369 &cbe_spu_info[node].reserved_spus)) >= count)
370 break;
371 }
372
373 if (node == MAX_NUMNODES)
374 return ERR_PTR(-EEXIST);
375 }
376
377 return neighbor;
378}
379
380static void
381spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
382 struct spu_context *neighbor)
383{
384 if (flags & SPU_CREATE_AFFINITY_MEM)
385 ctx->gang->aff_ref_ctx = ctx;
386
387 if (flags & SPU_CREATE_AFFINITY_SPU) {
388 if (list_empty(&neighbor->aff_list)) {
389 list_add_tail(&neighbor->aff_list,
390 &ctx->gang->aff_list_head);
391 neighbor->aff_head = 1;
392 }
393
394 if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
395 || list_entry(neighbor->aff_list.next, struct spu_context,
396 aff_list)->aff_head) {
397 list_add(&ctx->aff_list, &neighbor->aff_list);
398 } else {
399 list_add_tail(&ctx->aff_list, &neighbor->aff_list);
400 if (neighbor->aff_head) {
401 neighbor->aff_head = 0;
402 ctx->aff_head = 1;
403 }
404 }
405
406 if (!ctx->gang->aff_ref_ctx)
407 ctx->gang->aff_ref_ctx = ctx;
408 }
409}
410
411static int
412spufs_create_context(struct inode *inode, struct dentry *dentry,
413 struct vfsmount *mnt, int flags, int mode,
414 struct file *aff_filp)
322{ 415{
323 int ret; 416 int ret;
417 int affinity;
418 struct spu_gang *gang;
419 struct spu_context *neighbor;
324 420
325 ret = -EPERM; 421 ret = -EPERM;
326 if ((flags & SPU_CREATE_NOSCHED) && 422 if ((flags & SPU_CREATE_NOSCHED) &&
@@ -336,9 +432,29 @@ static int spufs_create_context(struct inode *inode,
336 if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader) 432 if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
337 goto out_unlock; 433 goto out_unlock;
338 434
435 gang = NULL;
436 neighbor = NULL;
437 affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
438 if (affinity) {
439 gang = SPUFS_I(inode)->i_gang;
440 ret = -EINVAL;
441 if (!gang)
442 goto out_unlock;
443 mutex_lock(&gang->aff_mutex);
444 neighbor = spufs_assert_affinity(flags, gang, aff_filp);
445 if (IS_ERR(neighbor)) {
446 ret = PTR_ERR(neighbor);
447 goto out_aff_unlock;
448 }
449 }
450
339 ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO); 451 ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
340 if (ret) 452 if (ret)
341 goto out_unlock; 453 goto out_aff_unlock;
454
455 if (affinity)
456 spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx,
457 neighbor);
342 458
343 /* 459 /*
344 * get references for dget and mntget, will be released 460 * get references for dget and mntget, will be released
@@ -352,6 +468,9 @@ static int spufs_create_context(struct inode *inode,
352 goto out; 468 goto out;
353 } 469 }
354 470
471out_aff_unlock:
472 if (affinity)
473 mutex_unlock(&gang->aff_mutex);
355out_unlock: 474out_unlock:
356 mutex_unlock(&inode->i_mutex); 475 mutex_unlock(&inode->i_mutex);
357out: 476out:
@@ -450,7 +569,8 @@ out:
450 569
451static struct file_system_type spufs_type; 570static struct file_system_type spufs_type;
452 571
453long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode) 572long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode,
573 struct file *filp)
454{ 574{
455 struct dentry *dentry; 575 struct dentry *dentry;
456 int ret; 576 int ret;
@@ -487,7 +607,7 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode)
487 dentry, nd->mnt, mode); 607 dentry, nd->mnt, mode);
488 else 608 else
489 return spufs_create_context(nd->dentry->d_inode, 609 return spufs_create_context(nd->dentry->d_inode,
490 dentry, nd->mnt, flags, mode); 610 dentry, nd->mnt, flags, mode, filp);
491 611
492out_dput: 612out_dput:
493 dput(dentry); 613 dput(dentry);
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index 58ae13b7de84..0b50fa5cb39d 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -18,15 +18,17 @@ void spufs_stop_callback(struct spu *spu)
18 wake_up_all(&ctx->stop_wq); 18 wake_up_all(&ctx->stop_wq);
19} 19}
20 20
21static inline int spu_stopped(struct spu_context *ctx, u32 * stat) 21static inline int spu_stopped(struct spu_context *ctx, u32 *stat)
22{ 22{
23 struct spu *spu; 23 struct spu *spu;
24 u64 pte_fault; 24 u64 pte_fault;
25 25
26 *stat = ctx->ops->status_read(ctx); 26 *stat = ctx->ops->status_read(ctx);
27 if (ctx->state != SPU_STATE_RUNNABLE) 27
28 return 1;
29 spu = ctx->spu; 28 spu = ctx->spu;
29 if (ctx->state != SPU_STATE_RUNNABLE ||
30 test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
31 return 1;
30 pte_fault = spu->dsisr & 32 pte_fault = spu->dsisr &
31 (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); 33 (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED);
32 return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending) ? 34 return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending) ?
@@ -124,8 +126,10 @@ out:
124 return ret; 126 return ret;
125} 127}
126 128
127static int spu_run_init(struct spu_context *ctx, u32 * npc) 129static int spu_run_init(struct spu_context *ctx, u32 *npc)
128{ 130{
131 spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
132
129 if (ctx->flags & SPU_CREATE_ISOLATE) { 133 if (ctx->flags & SPU_CREATE_ISOLATE) {
130 unsigned long runcntl; 134 unsigned long runcntl;
131 135
@@ -151,16 +155,20 @@ static int spu_run_init(struct spu_context *ctx, u32 * npc)
151 ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); 155 ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
152 } 156 }
153 157
158 spuctx_switch_state(ctx, SPU_UTIL_USER);
159
154 return 0; 160 return 0;
155} 161}
156 162
157static int spu_run_fini(struct spu_context *ctx, u32 * npc, 163static int spu_run_fini(struct spu_context *ctx, u32 *npc,
158 u32 * status) 164 u32 *status)
159{ 165{
160 int ret = 0; 166 int ret = 0;
161 167
162 *status = ctx->ops->status_read(ctx); 168 *status = ctx->ops->status_read(ctx);
163 *npc = ctx->ops->npc_read(ctx); 169 *npc = ctx->ops->npc_read(ctx);
170
171 spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
164 spu_release(ctx); 172 spu_release(ctx);
165 173
166 if (signal_pending(current)) 174 if (signal_pending(current))
@@ -289,10 +297,10 @@ static inline int spu_process_events(struct spu_context *ctx)
289 return ret; 297 return ret;
290} 298}
291 299
292long spufs_run_spu(struct file *file, struct spu_context *ctx, 300long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
293 u32 *npc, u32 *event)
294{ 301{
295 int ret; 302 int ret;
303 struct spu *spu;
296 u32 status; 304 u32 status;
297 305
298 if (mutex_lock_interruptible(&ctx->run_mutex)) 306 if (mutex_lock_interruptible(&ctx->run_mutex))
@@ -328,6 +336,17 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
328 ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status)); 336 ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
329 if (unlikely(ret)) 337 if (unlikely(ret))
330 break; 338 break;
339 spu = ctx->spu;
340 if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
341 &ctx->sched_flags))) {
342 if (!(status & SPU_STATUS_STOPPED_BY_STOP)) {
343 spu_switch_notify(spu, ctx);
344 continue;
345 }
346 }
347
348 spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
349
331 if ((status & SPU_STATUS_STOPPED_BY_STOP) && 350 if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
332 (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) { 351 (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
333 ret = spu_process_callback(ctx); 352 ret = spu_process_callback(ctx);
@@ -356,6 +375,7 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
356 (ctx->state == SPU_STATE_RUNNABLE)) 375 (ctx->state == SPU_STATE_RUNNABLE))
357 ctx->stats.libassist++; 376 ctx->stats.libassist++;
358 377
378
359 ctx->ops->master_stop(ctx); 379 ctx->ops->master_stop(ctx);
360 ret = spu_run_fini(ctx, npc, &status); 380 ret = spu_run_fini(ctx, npc, &status);
361 spu_yield(ctx); 381 spu_yield(ctx);
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index e5b4dd1db286..227968b4779d 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -51,9 +51,6 @@ struct spu_prio_array {
51 DECLARE_BITMAP(bitmap, MAX_PRIO); 51 DECLARE_BITMAP(bitmap, MAX_PRIO);
52 struct list_head runq[MAX_PRIO]; 52 struct list_head runq[MAX_PRIO];
53 spinlock_t runq_lock; 53 spinlock_t runq_lock;
54 struct list_head active_list[MAX_NUMNODES];
55 struct mutex active_mutex[MAX_NUMNODES];
56 int nr_active[MAX_NUMNODES];
57 int nr_waiting; 54 int nr_waiting;
58}; 55};
59 56
@@ -127,7 +124,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
127 ctx->policy = current->policy; 124 ctx->policy = current->policy;
128 125
129 /* 126 /*
130 * A lot of places that don't hold active_mutex poke into 127 * A lot of places that don't hold list_mutex poke into
131 * cpus_allowed, including grab_runnable_context which 128 * cpus_allowed, including grab_runnable_context which
132 * already holds the runq_lock. So abuse runq_lock 129 * already holds the runq_lock. So abuse runq_lock
133 * to protect this field aswell. 130 * to protect this field aswell.
@@ -141,9 +138,9 @@ void spu_update_sched_info(struct spu_context *ctx)
141{ 138{
142 int node = ctx->spu->node; 139 int node = ctx->spu->node;
143 140
144 mutex_lock(&spu_prio->active_mutex[node]); 141 mutex_lock(&cbe_spu_info[node].list_mutex);
145 __spu_update_sched_info(ctx); 142 __spu_update_sched_info(ctx);
146 mutex_unlock(&spu_prio->active_mutex[node]); 143 mutex_unlock(&cbe_spu_info[node].list_mutex);
147} 144}
148 145
149static int __node_allowed(struct spu_context *ctx, int node) 146static int __node_allowed(struct spu_context *ctx, int node)
@@ -169,56 +166,56 @@ static int node_allowed(struct spu_context *ctx, int node)
169 return rval; 166 return rval;
170} 167}
171 168
172/** 169static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
173 * spu_add_to_active_list - add spu to active list
174 * @spu: spu to add to the active list
175 */
176static void spu_add_to_active_list(struct spu *spu)
177{
178 int node = spu->node;
179
180 mutex_lock(&spu_prio->active_mutex[node]);
181 spu_prio->nr_active[node]++;
182 list_add_tail(&spu->list, &spu_prio->active_list[node]);
183 mutex_unlock(&spu_prio->active_mutex[node]);
184}
185 170
186static void __spu_remove_from_active_list(struct spu *spu) 171void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
187{ 172{
188 list_del_init(&spu->list); 173 blocking_notifier_call_chain(&spu_switch_notifier,
189 spu_prio->nr_active[spu->node]--; 174 ctx ? ctx->object_id : 0, spu);
190} 175}
191 176
192/** 177static void notify_spus_active(void)
193 * spu_remove_from_active_list - remove spu from active list
194 * @spu: spu to remove from the active list
195 */
196static void spu_remove_from_active_list(struct spu *spu)
197{ 178{
198 int node = spu->node; 179 int node;
199
200 mutex_lock(&spu_prio->active_mutex[node]);
201 __spu_remove_from_active_list(spu);
202 mutex_unlock(&spu_prio->active_mutex[node]);
203}
204 180
205static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); 181 /*
182 * Wake up the active spu_contexts.
183 *
184 * When the awakened processes see their "notify_active" flag is set,
185 * they will call spu_switch_notify();
186 */
187 for_each_online_node(node) {
188 struct spu *spu;
206 189
207static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) 190 mutex_lock(&cbe_spu_info[node].list_mutex);
208{ 191 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
209 blocking_notifier_call_chain(&spu_switch_notifier, 192 if (spu->alloc_state != SPU_FREE) {
210 ctx ? ctx->object_id : 0, spu); 193 struct spu_context *ctx = spu->ctx;
194 set_bit(SPU_SCHED_NOTIFY_ACTIVE,
195 &ctx->sched_flags);
196 mb();
197 wake_up_all(&ctx->stop_wq);
198 }
199 }
200 mutex_unlock(&cbe_spu_info[node].list_mutex);
201 }
211} 202}
212 203
213int spu_switch_event_register(struct notifier_block * n) 204int spu_switch_event_register(struct notifier_block * n)
214{ 205{
215 return blocking_notifier_chain_register(&spu_switch_notifier, n); 206 int ret;
207 ret = blocking_notifier_chain_register(&spu_switch_notifier, n);
208 if (!ret)
209 notify_spus_active();
210 return ret;
216} 211}
212EXPORT_SYMBOL_GPL(spu_switch_event_register);
217 213
218int spu_switch_event_unregister(struct notifier_block * n) 214int spu_switch_event_unregister(struct notifier_block * n)
219{ 215{
220 return blocking_notifier_chain_unregister(&spu_switch_notifier, n); 216 return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
221} 217}
218EXPORT_SYMBOL_GPL(spu_switch_event_unregister);
222 219
223/** 220/**
224 * spu_bind_context - bind spu context to physical spu 221 * spu_bind_context - bind spu context to physical spu
@@ -229,6 +226,12 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
229{ 226{
230 pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, 227 pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
231 spu->number, spu->node); 228 spu->number, spu->node);
229 spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
230
231 if (ctx->flags & SPU_CREATE_NOSCHED)
232 atomic_inc(&cbe_spu_info[spu->node].reserved_spus);
233 if (!list_empty(&ctx->aff_list))
234 atomic_inc(&ctx->gang->aff_sched_count);
232 235
233 ctx->stats.slb_flt_base = spu->stats.slb_flt; 236 ctx->stats.slb_flt_base = spu->stats.slb_flt;
234 ctx->stats.class2_intr_base = spu->stats.class2_intr; 237 ctx->stats.class2_intr_base = spu->stats.class2_intr;
@@ -238,6 +241,7 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
238 ctx->spu = spu; 241 ctx->spu = spu;
239 ctx->ops = &spu_hw_ops; 242 ctx->ops = &spu_hw_ops;
240 spu->pid = current->pid; 243 spu->pid = current->pid;
244 spu->tgid = current->tgid;
241 spu_associate_mm(spu, ctx->owner); 245 spu_associate_mm(spu, ctx->owner);
242 spu->ibox_callback = spufs_ibox_callback; 246 spu->ibox_callback = spufs_ibox_callback;
243 spu->wbox_callback = spufs_wbox_callback; 247 spu->wbox_callback = spufs_wbox_callback;
@@ -251,7 +255,153 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
251 spu_cpu_affinity_set(spu, raw_smp_processor_id()); 255 spu_cpu_affinity_set(spu, raw_smp_processor_id());
252 spu_switch_notify(spu, ctx); 256 spu_switch_notify(spu, ctx);
253 ctx->state = SPU_STATE_RUNNABLE; 257 ctx->state = SPU_STATE_RUNNABLE;
254 spu_switch_state(spu, SPU_UTIL_SYSTEM); 258
259 spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
260}
261
262/*
263 * Must be used with the list_mutex held.
264 */
265static inline int sched_spu(struct spu *spu)
266{
267 BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
268
269 return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
270}
271
272static void aff_merge_remaining_ctxs(struct spu_gang *gang)
273{
274 struct spu_context *ctx;
275
276 list_for_each_entry(ctx, &gang->aff_list_head, aff_list) {
277 if (list_empty(&ctx->aff_list))
278 list_add(&ctx->aff_list, &gang->aff_list_head);
279 }
280 gang->aff_flags |= AFF_MERGED;
281}
282
283static void aff_set_offsets(struct spu_gang *gang)
284{
285 struct spu_context *ctx;
286 int offset;
287
288 offset = -1;
289 list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
290 aff_list) {
291 if (&ctx->aff_list == &gang->aff_list_head)
292 break;
293 ctx->aff_offset = offset--;
294 }
295
296 offset = 0;
297 list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) {
298 if (&ctx->aff_list == &gang->aff_list_head)
299 break;
300 ctx->aff_offset = offset++;
301 }
302
303 gang->aff_flags |= AFF_OFFSETS_SET;
304}
305
306static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
307 int group_size, int lowest_offset)
308{
309 struct spu *spu;
310 int node, n;
311
312 /*
313 * TODO: A better algorithm could be used to find a good spu to be
314 * used as reference location for the ctxs chain.
315 */
316 node = cpu_to_node(raw_smp_processor_id());
317 for (n = 0; n < MAX_NUMNODES; n++, node++) {
318 node = (node < MAX_NUMNODES) ? node : 0;
319 if (!node_allowed(ctx, node))
320 continue;
321 mutex_lock(&cbe_spu_info[node].list_mutex);
322 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
323 if ((!mem_aff || spu->has_mem_affinity) &&
324 sched_spu(spu)) {
325 mutex_unlock(&cbe_spu_info[node].list_mutex);
326 return spu;
327 }
328 }
329 mutex_unlock(&cbe_spu_info[node].list_mutex);
330 }
331 return NULL;
332}
333
334static void aff_set_ref_point_location(struct spu_gang *gang)
335{
336 int mem_aff, gs, lowest_offset;
337 struct spu_context *ctx;
338 struct spu *tmp;
339
340 mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM;
341 lowest_offset = 0;
342 gs = 0;
343
344 list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
345 gs++;
346
347 list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
348 aff_list) {
349 if (&ctx->aff_list == &gang->aff_list_head)
350 break;
351 lowest_offset = ctx->aff_offset;
352 }
353
354 gang->aff_ref_spu = aff_ref_location(ctx, mem_aff, gs, lowest_offset);
355}
356
357static struct spu *ctx_location(struct spu *ref, int offset, int node)
358{
359 struct spu *spu;
360
361 spu = NULL;
362 if (offset >= 0) {
363 list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
364 BUG_ON(spu->node != node);
365 if (offset == 0)
366 break;
367 if (sched_spu(spu))
368 offset--;
369 }
370 } else {
371 list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
372 BUG_ON(spu->node != node);
373 if (offset == 0)
374 break;
375 if (sched_spu(spu))
376 offset++;
377 }
378 }
379
380 return spu;
381}
382
383/*
384 * affinity_check is called each time a context is going to be scheduled.
385 * It returns the spu ptr on which the context must run.
386 */
387static int has_affinity(struct spu_context *ctx)
388{
389 struct spu_gang *gang = ctx->gang;
390
391 if (list_empty(&ctx->aff_list))
392 return 0;
393
394 mutex_lock(&gang->aff_mutex);
395 if (!gang->aff_ref_spu) {
396 if (!(gang->aff_flags & AFF_MERGED))
397 aff_merge_remaining_ctxs(gang);
398 if (!(gang->aff_flags & AFF_OFFSETS_SET))
399 aff_set_offsets(gang);
400 aff_set_ref_point_location(gang);
401 }
402 mutex_unlock(&gang->aff_mutex);
403
404 return gang->aff_ref_spu != NULL;
255} 405}
256 406
257/** 407/**
@@ -263,9 +413,13 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
263{ 413{
264 pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, 414 pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
265 spu->pid, spu->number, spu->node); 415 spu->pid, spu->number, spu->node);
416 spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
266 417
267 spu_switch_state(spu, SPU_UTIL_IDLE); 418 if (spu->ctx->flags & SPU_CREATE_NOSCHED)
268 419 atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
420 if (!list_empty(&ctx->aff_list))
421 if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
422 ctx->gang->aff_ref_spu = NULL;
269 spu_switch_notify(spu, NULL); 423 spu_switch_notify(spu, NULL);
270 spu_unmap_mappings(ctx); 424 spu_unmap_mappings(ctx);
271 spu_save(&ctx->csa, spu); 425 spu_save(&ctx->csa, spu);
@@ -278,8 +432,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
278 spu->dma_callback = NULL; 432 spu->dma_callback = NULL;
279 spu_associate_mm(spu, NULL); 433 spu_associate_mm(spu, NULL);
280 spu->pid = 0; 434 spu->pid = 0;
435 spu->tgid = 0;
281 ctx->ops = &spu_backing_ops; 436 ctx->ops = &spu_backing_ops;
282 ctx->spu = NULL;
283 spu->flags = 0; 437 spu->flags = 0;
284 spu->ctx = NULL; 438 spu->ctx = NULL;
285 439
@@ -287,6 +441,10 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
287 (spu->stats.slb_flt - ctx->stats.slb_flt_base); 441 (spu->stats.slb_flt - ctx->stats.slb_flt_base);
288 ctx->stats.class2_intr += 442 ctx->stats.class2_intr +=
289 (spu->stats.class2_intr - ctx->stats.class2_intr_base); 443 (spu->stats.class2_intr - ctx->stats.class2_intr_base);
444
445 /* This maps the underlying spu state to idle */
446 spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
447 ctx->spu = NULL;
290} 448}
291 449
292/** 450/**
@@ -352,18 +510,41 @@ static void spu_prio_wait(struct spu_context *ctx)
352 510
353static struct spu *spu_get_idle(struct spu_context *ctx) 511static struct spu *spu_get_idle(struct spu_context *ctx)
354{ 512{
355 struct spu *spu = NULL; 513 struct spu *spu;
356 int node = cpu_to_node(raw_smp_processor_id()); 514 int node, n;
357 int n; 515
516 if (has_affinity(ctx)) {
517 node = ctx->gang->aff_ref_spu->node;
358 518
519 mutex_lock(&cbe_spu_info[node].list_mutex);
520 spu = ctx_location(ctx->gang->aff_ref_spu, ctx->aff_offset, node);
521 if (spu && spu->alloc_state == SPU_FREE)
522 goto found;
523 mutex_unlock(&cbe_spu_info[node].list_mutex);
524 return NULL;
525 }
526
527 node = cpu_to_node(raw_smp_processor_id());
359 for (n = 0; n < MAX_NUMNODES; n++, node++) { 528 for (n = 0; n < MAX_NUMNODES; n++, node++) {
360 node = (node < MAX_NUMNODES) ? node : 0; 529 node = (node < MAX_NUMNODES) ? node : 0;
361 if (!node_allowed(ctx, node)) 530 if (!node_allowed(ctx, node))
362 continue; 531 continue;
363 spu = spu_alloc_node(node); 532
364 if (spu) 533 mutex_lock(&cbe_spu_info[node].list_mutex);
365 break; 534 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
535 if (spu->alloc_state == SPU_FREE)
536 goto found;
537 }
538 mutex_unlock(&cbe_spu_info[node].list_mutex);
366 } 539 }
540
541 return NULL;
542
543 found:
544 spu->alloc_state = SPU_USED;
545 mutex_unlock(&cbe_spu_info[node].list_mutex);
546 pr_debug("Got SPU %d %d\n", spu->number, spu->node);
547 spu_init_channels(spu);
367 return spu; 548 return spu;
368} 549}
369 550
@@ -393,15 +574,15 @@ static struct spu *find_victim(struct spu_context *ctx)
393 if (!node_allowed(ctx, node)) 574 if (!node_allowed(ctx, node))
394 continue; 575 continue;
395 576
396 mutex_lock(&spu_prio->active_mutex[node]); 577 mutex_lock(&cbe_spu_info[node].list_mutex);
397 list_for_each_entry(spu, &spu_prio->active_list[node], list) { 578 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
398 struct spu_context *tmp = spu->ctx; 579 struct spu_context *tmp = spu->ctx;
399 580
400 if (tmp->prio > ctx->prio && 581 if (tmp->prio > ctx->prio &&
401 (!victim || tmp->prio > victim->prio)) 582 (!victim || tmp->prio > victim->prio))
402 victim = spu->ctx; 583 victim = spu->ctx;
403 } 584 }
404 mutex_unlock(&spu_prio->active_mutex[node]); 585 mutex_unlock(&cbe_spu_info[node].list_mutex);
405 586
406 if (victim) { 587 if (victim) {
407 /* 588 /*
@@ -426,7 +607,11 @@ static struct spu *find_victim(struct spu_context *ctx)
426 victim = NULL; 607 victim = NULL;
427 goto restart; 608 goto restart;
428 } 609 }
429 spu_remove_from_active_list(spu); 610
611 mutex_lock(&cbe_spu_info[node].list_mutex);
612 cbe_spu_info[node].nr_active--;
613 mutex_unlock(&cbe_spu_info[node].list_mutex);
614
430 spu_unbind_context(spu, victim); 615 spu_unbind_context(spu, victim);
431 victim->stats.invol_ctx_switch++; 616 victim->stats.invol_ctx_switch++;
432 spu->stats.invol_ctx_switch++; 617 spu->stats.invol_ctx_switch++;
@@ -455,8 +640,6 @@ static struct spu *find_victim(struct spu_context *ctx)
455 */ 640 */
456int spu_activate(struct spu_context *ctx, unsigned long flags) 641int spu_activate(struct spu_context *ctx, unsigned long flags)
457{ 642{
458 spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
459
460 do { 643 do {
461 struct spu *spu; 644 struct spu *spu;
462 645
@@ -477,8 +660,12 @@ int spu_activate(struct spu_context *ctx, unsigned long flags)
477 if (!spu && rt_prio(ctx->prio)) 660 if (!spu && rt_prio(ctx->prio))
478 spu = find_victim(ctx); 661 spu = find_victim(ctx);
479 if (spu) { 662 if (spu) {
663 int node = spu->node;
664
665 mutex_lock(&cbe_spu_info[node].list_mutex);
480 spu_bind_context(spu, ctx); 666 spu_bind_context(spu, ctx);
481 spu_add_to_active_list(spu); 667 cbe_spu_info[node].nr_active++;
668 mutex_unlock(&cbe_spu_info[node].list_mutex);
482 return 0; 669 return 0;
483 } 670 }
484 671
@@ -500,7 +687,7 @@ static struct spu_context *grab_runnable_context(int prio, int node)
500 int best; 687 int best;
501 688
502 spin_lock(&spu_prio->runq_lock); 689 spin_lock(&spu_prio->runq_lock);
503 best = sched_find_first_bit(spu_prio->bitmap); 690 best = find_first_bit(spu_prio->bitmap, prio);
504 while (best < prio) { 691 while (best < prio) {
505 struct list_head *rq = &spu_prio->runq[best]; 692 struct list_head *rq = &spu_prio->runq[best];
506 693
@@ -527,11 +714,17 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
527 if (spu) { 714 if (spu) {
528 new = grab_runnable_context(max_prio, spu->node); 715 new = grab_runnable_context(max_prio, spu->node);
529 if (new || force) { 716 if (new || force) {
530 spu_remove_from_active_list(spu); 717 int node = spu->node;
718
719 mutex_lock(&cbe_spu_info[node].list_mutex);
531 spu_unbind_context(spu, ctx); 720 spu_unbind_context(spu, ctx);
721 spu->alloc_state = SPU_FREE;
722 cbe_spu_info[node].nr_active--;
723 mutex_unlock(&cbe_spu_info[node].list_mutex);
724
532 ctx->stats.vol_ctx_switch++; 725 ctx->stats.vol_ctx_switch++;
533 spu->stats.vol_ctx_switch++; 726 spu->stats.vol_ctx_switch++;
534 spu_free(spu); 727
535 if (new) 728 if (new)
536 wake_up(&new->stop_wq); 729 wake_up(&new->stop_wq);
537 } 730 }
@@ -550,21 +743,11 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
550 */ 743 */
551void spu_deactivate(struct spu_context *ctx) 744void spu_deactivate(struct spu_context *ctx)
552{ 745{
553 /*
554 * We must never reach this for a nosched context,
555 * but handle the case gracefull instead of panicing.
556 */
557 if (ctx->flags & SPU_CREATE_NOSCHED) {
558 WARN_ON(1);
559 return;
560 }
561
562 __spu_deactivate(ctx, 1, MAX_PRIO); 746 __spu_deactivate(ctx, 1, MAX_PRIO);
563 spuctx_switch_state(ctx, SPUCTX_UTIL_USER);
564} 747}
565 748
566/** 749/**
567 * spu_yield - yield a physical spu if others are waiting 750 * spu_yield - yield a physical spu if others are waiting
568 * @ctx: spu context to yield 751 * @ctx: spu context to yield
569 * 752 *
570 * Check if there is a higher priority context waiting and if yes 753 * Check if there is a higher priority context waiting and if yes
@@ -575,17 +758,12 @@ void spu_yield(struct spu_context *ctx)
575{ 758{
576 if (!(ctx->flags & SPU_CREATE_NOSCHED)) { 759 if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
577 mutex_lock(&ctx->state_mutex); 760 mutex_lock(&ctx->state_mutex);
578 if (__spu_deactivate(ctx, 0, MAX_PRIO)) 761 __spu_deactivate(ctx, 0, MAX_PRIO);
579 spuctx_switch_state(ctx, SPUCTX_UTIL_USER);
580 else {
581 spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED);
582 spu_switch_state(ctx->spu, SPU_UTIL_USER);
583 }
584 mutex_unlock(&ctx->state_mutex); 762 mutex_unlock(&ctx->state_mutex);
585 } 763 }
586} 764}
587 765
588static void spusched_tick(struct spu_context *ctx) 766static noinline void spusched_tick(struct spu_context *ctx)
589{ 767{
590 if (ctx->flags & SPU_CREATE_NOSCHED) 768 if (ctx->flags & SPU_CREATE_NOSCHED)
591 return; 769 return;
@@ -596,7 +774,7 @@ static void spusched_tick(struct spu_context *ctx)
596 return; 774 return;
597 775
598 /* 776 /*
599 * Unfortunately active_mutex ranks outside of state_mutex, so 777 * Unfortunately list_mutex ranks outside of state_mutex, so
600 * we have to trylock here. If we fail give the context another 778 * we have to trylock here. If we fail give the context another
601 * tick and try again. 779 * tick and try again.
602 */ 780 */
@@ -606,12 +784,11 @@ static void spusched_tick(struct spu_context *ctx)
606 784
607 new = grab_runnable_context(ctx->prio + 1, spu->node); 785 new = grab_runnable_context(ctx->prio + 1, spu->node);
608 if (new) { 786 if (new) {
609
610 __spu_remove_from_active_list(spu);
611 spu_unbind_context(spu, ctx); 787 spu_unbind_context(spu, ctx);
612 ctx->stats.invol_ctx_switch++; 788 ctx->stats.invol_ctx_switch++;
613 spu->stats.invol_ctx_switch++; 789 spu->stats.invol_ctx_switch++;
614 spu_free(spu); 790 spu->alloc_state = SPU_FREE;
791 cbe_spu_info[spu->node].nr_active--;
615 wake_up(&new->stop_wq); 792 wake_up(&new->stop_wq);
616 /* 793 /*
617 * We need to break out of the wait loop in 794 * We need to break out of the wait loop in
@@ -632,7 +809,7 @@ static void spusched_tick(struct spu_context *ctx)
632 * 809 *
633 * Return the number of tasks currently running or waiting to run. 810 * Return the number of tasks currently running or waiting to run.
634 * 811 *
635 * Note that we don't take runq_lock / active_mutex here. Reading 812 * Note that we don't take runq_lock / list_mutex here. Reading
636 * a single 32bit value is atomic on powerpc, and we don't care 813 * a single 32bit value is atomic on powerpc, and we don't care
637 * about memory ordering issues here. 814 * about memory ordering issues here.
638 */ 815 */
@@ -641,7 +818,7 @@ static unsigned long count_active_contexts(void)
641 int nr_active = 0, node; 818 int nr_active = 0, node;
642 819
643 for (node = 0; node < MAX_NUMNODES; node++) 820 for (node = 0; node < MAX_NUMNODES; node++)
644 nr_active += spu_prio->nr_active[node]; 821 nr_active += cbe_spu_info[node].nr_active;
645 nr_active += spu_prio->nr_waiting; 822 nr_active += spu_prio->nr_waiting;
646 823
647 return nr_active; 824 return nr_active;
@@ -681,19 +858,18 @@ static void spusched_wake(unsigned long data)
681 858
682static int spusched_thread(void *unused) 859static int spusched_thread(void *unused)
683{ 860{
684 struct spu *spu, *next; 861 struct spu *spu;
685 int node; 862 int node;
686 863
687 while (!kthread_should_stop()) { 864 while (!kthread_should_stop()) {
688 set_current_state(TASK_INTERRUPTIBLE); 865 set_current_state(TASK_INTERRUPTIBLE);
689 schedule(); 866 schedule();
690 for (node = 0; node < MAX_NUMNODES; node++) { 867 for (node = 0; node < MAX_NUMNODES; node++) {
691 mutex_lock(&spu_prio->active_mutex[node]); 868 mutex_lock(&cbe_spu_info[node].list_mutex);
692 list_for_each_entry_safe(spu, next, 869 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
693 &spu_prio->active_list[node], 870 if (spu->ctx)
694 list) 871 spusched_tick(spu->ctx);
695 spusched_tick(spu->ctx); 872 mutex_unlock(&cbe_spu_info[node].list_mutex);
696 mutex_unlock(&spu_prio->active_mutex[node]);
697 } 873 }
698 } 874 }
699 875
@@ -751,10 +927,9 @@ int __init spu_sched_init(void)
751 INIT_LIST_HEAD(&spu_prio->runq[i]); 927 INIT_LIST_HEAD(&spu_prio->runq[i]);
752 __clear_bit(i, spu_prio->bitmap); 928 __clear_bit(i, spu_prio->bitmap);
753 } 929 }
754 __set_bit(MAX_PRIO, spu_prio->bitmap);
755 for (i = 0; i < MAX_NUMNODES; i++) { 930 for (i = 0; i < MAX_NUMNODES; i++) {
756 mutex_init(&spu_prio->active_mutex[i]); 931 mutex_init(&cbe_spu_info[i].list_mutex);
757 INIT_LIST_HEAD(&spu_prio->active_list[i]); 932 INIT_LIST_HEAD(&cbe_spu_info[i].spus);
758 } 933 }
759 spin_lock_init(&spu_prio->runq_lock); 934 spin_lock_init(&spu_prio->runq_lock);
760 935
@@ -783,9 +958,9 @@ int __init spu_sched_init(void)
783 return err; 958 return err;
784} 959}
785 960
786void __exit spu_sched_exit(void) 961void spu_sched_exit(void)
787{ 962{
788 struct spu *spu, *tmp; 963 struct spu *spu;
789 int node; 964 int node;
790 965
791 remove_proc_entry("spu_loadavg", NULL); 966 remove_proc_entry("spu_loadavg", NULL);
@@ -794,13 +969,11 @@ void __exit spu_sched_exit(void)
794 kthread_stop(spusched_task); 969 kthread_stop(spusched_task);
795 970
796 for (node = 0; node < MAX_NUMNODES; node++) { 971 for (node = 0; node < MAX_NUMNODES; node++) {
797 mutex_lock(&spu_prio->active_mutex[node]); 972 mutex_lock(&cbe_spu_info[node].list_mutex);
798 list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node], 973 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
799 list) { 974 if (spu->alloc_state != SPU_FREE)
800 list_del_init(&spu->list); 975 spu->alloc_state = SPU_FREE;
801 spu_free(spu); 976 mutex_unlock(&cbe_spu_info[node].list_mutex);
802 }
803 mutex_unlock(&spu_prio->active_mutex[node]);
804 } 977 }
805 kfree(spu_prio); 978 kfree(spu_prio);
806} 979}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c
index 4e19ed7a0756..21a9c952d88b 100644
--- a/arch/powerpc/platforms/cell/spufs/spu_restore.c
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c
@@ -84,13 +84,13 @@ static inline void restore_decr(void)
84 unsigned int decr_running; 84 unsigned int decr_running;
85 unsigned int decr; 85 unsigned int decr;
86 86
87 /* Restore, Step 6: 87 /* Restore, Step 6(moved):
88 * If the LSCSA "decrementer running" flag is set 88 * If the LSCSA "decrementer running" flag is set
89 * then write the SPU_WrDec channel with the 89 * then write the SPU_WrDec channel with the
90 * decrementer value from LSCSA. 90 * decrementer value from LSCSA.
91 */ 91 */
92 offset = LSCSA_QW_OFFSET(decr_status); 92 offset = LSCSA_QW_OFFSET(decr_status);
93 decr_running = regs_spill[offset].slot[0]; 93 decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING;
94 if (decr_running) { 94 if (decr_running) {
95 offset = LSCSA_QW_OFFSET(decr); 95 offset = LSCSA_QW_OFFSET(decr);
96 decr = regs_spill[offset].slot[0]; 96 decr = regs_spill[offset].slot[0];
@@ -318,10 +318,10 @@ int main()
318 build_dma_list(lscsa_ea); /* Step 3. */ 318 build_dma_list(lscsa_ea); /* Step 3. */
319 restore_upper_240kb(lscsa_ea); /* Step 4. */ 319 restore_upper_240kb(lscsa_ea); /* Step 4. */
320 /* Step 5: done by 'exit'. */ 320 /* Step 5: done by 'exit'. */
321 restore_decr(); /* Step 6. */
322 enqueue_putllc(lscsa_ea); /* Step 7. */ 321 enqueue_putllc(lscsa_ea); /* Step 7. */
323 set_tag_update(); /* Step 8. */ 322 set_tag_update(); /* Step 8. */
324 read_tag_status(); /* Step 9. */ 323 read_tag_status(); /* Step 9. */
324 restore_decr(); /* moved Step 6. */
325 read_llar_status(); /* Step 10. */ 325 read_llar_status(); /* Step 10. */
326 write_ppu_mb(); /* Step 11. */ 326 write_ppu_mb(); /* Step 11. */
327 write_ppuint_mb(); /* Step 12. */ 327 write_ppuint_mb(); /* Step 12. */
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
index 15183d209b58..f383b027e8bf 100644
--- a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
@@ -10,7 +10,7 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = {
100x24fd8081, 100x24fd8081,
110x1cd80081, 110x1cd80081,
120x33001180, 120x33001180,
130x42030003, 130x42034003,
140x33800284, 140x33800284,
150x1c010204, 150x1c010204,
160x40200000, 160x40200000,
@@ -24,22 +24,22 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = {
240x23fffd84, 240x23fffd84,
250x1c100183, 250x1c100183,
260x217ffa85, 260x217ffa85,
270x3080a000, 270x3080b000,
280x3080a201, 280x3080b201,
290x3080a402, 290x3080b402,
300x3080a603, 300x3080b603,
310x3080a804, 310x3080b804,
320x3080aa05, 320x3080ba05,
330x3080ac06, 330x3080bc06,
340x3080ae07, 340x3080be07,
350x3080b008, 350x3080c008,
360x3080b209, 360x3080c209,
370x3080b40a, 370x3080c40a,
380x3080b60b, 380x3080c60b,
390x3080b80c, 390x3080c80c,
400x3080ba0d, 400x3080ca0d,
410x3080bc0e, 410x3080cc0e,
420x3080be0f, 420x3080ce0f,
430x00003ffc, 430x00003ffc,
440x00000000, 440x00000000,
450x00000000, 450x00000000,
@@ -48,19 +48,18 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = {
480x3ec00083, 480x3ec00083,
490xb0a14103, 490xb0a14103,
500x01a00204, 500x01a00204,
510x3ec10082, 510x3ec10083,
520x4202800e, 520x4202c002,
530x04000703, 530xb0a14203,
540xb0a14202, 540x21a00802,
550x21a00803, 550x3fbf028a,
560x3fbf028d, 560x3f20050a,
570x3f20068d, 570x3fbe0502,
580x3fbe0682,
590x3fe30102, 580x3fe30102,
600x21a00882, 590x21a00882,
610x3f82028f, 600x3f82028b,
620x3fe3078f, 610x3fe3058b,
630x3fbf0784, 620x3fbf0584,
640x3f200204, 630x3f200204,
650x3fbe0204, 640x3fbe0204,
660x3fe30204, 650x3fe30204,
@@ -75,252 +74,285 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = {
750x21a00083, 740x21a00083,
760x40800082, 750x40800082,
770x21a00b02, 760x21a00b02,
780x10002818, 770x10002612,
790x42a00002, 780x42a00003,
800x32800007, 790x42074006,
810x4207000c, 800x1800c204,
820x18008208, 810x40a00008,
830x40a0000b, 820x40800789,
840x4080020a, 830x1c010305,
850x40800709, 840x34000302,
860x00200000,
870x42070002,
880x3ac30384,
890x1cffc489, 850x1cffc489,
900x00200000, 860x3ec00303,
910x18008383, 870x3ec00287,
920x38830382, 880xb0408403,
930x4cffc486, 890x24000302,
940x3ac28185, 900x34000282,
950xb0408584, 910x1c020306,
960x28830382, 920xb0408207,
970x1c020387, 930x18020204,
980x38828182, 940x24000282,
990xb0408405, 950x217ffa09,
1000x1802c408, 960x04000402,
1010x28828182, 970x21a00802,
1020x217ff886, 980x3fbe0504,
1030x04000583, 990x3fe30204,
1040x21a00803, 1000x21a00884,
1050x3fbe0682, 1010x42074002,
1060x3fe30102, 1020x21a00902,
1070x04000106, 1030x40803c03,
1080x21a00886, 1040x21a00983,
1090x04000603, 1050x04000485,
1100x21a00903, 1060x21a00a05,
1110x40803c02,
1120x21a00982,
1130x40800003,
1140x04000184,
1150x21a00a04,
1160x40802202, 1070x40802202,
1170x21a00a82, 1080x21a00a82,
1180x42028005, 1090x21a00805,
1190x34208702, 1100x21a00884,
1200x21002282, 1110x3fbf0582,
1210x21a00804,
1220x21a00886,
1230x3fbf0782,
1240x3f200102, 1120x3f200102,
1250x3fbe0102, 1130x3fbe0102,
1260x3fe30102, 1140x3fe30102,
1270x21a00902, 1150x21a00902,
1280x40804003, 1160x40804003,
1290x21a00983, 1170x21a00983,
1300x21a00a04, 1180x21a00a05,
1310x40805a02, 1190x40805a02,
1320x21a00a82, 1200x21a00a82,
1330x40800083, 1210x40800083,
1340x21a00b83, 1220x21a00b83,
1350x01a00c02, 1230x01a00c02,
1360x01a00d83, 1240x30809c03,
1370x3420c282, 1250x34000182,
1260x14004102,
1270x21002082,
1280x01a00d82,
1290x3080a003,
1300x34000182,
1380x21a00e02, 1310x21a00e02,
1390x34210283, 1320x3080a203,
1400x21a00f03, 1330x34000182,
1410x34200284, 1340x21a00f02,
1420x77400200, 1350x3080a403,
1430x3421c282, 1360x34000182,
1370x77400100,
1380x3080a603,
1390x34000182,
1440x21a00702, 1400x21a00702,
1450x34218283, 1410x3080a803,
1460x21a00083, 1420x34000182,
1470x34214282, 1430x21a00082,
1440x3080aa03,
1450x34000182,
1480x21a00b02, 1460x21a00b02,
1490x4200480c, 1470x4020007f,
1500x00200000, 1480x3080ae02,
1510x1c010286, 1490x42004805,
1520x34220284, 1500x3080ac04,
1530x34220302, 1510x34000103,
1540x0f608203, 1520x34000202,
1550x5c024204, 1530x1cffc183,
1560x3b81810b, 1540x3b810106,
1570x42013c02, 1550x0f608184,
1580x00200000, 1560x42013802,
1590x18008185, 1570x5c020183,
1600x38808183, 1580x38810102,
1610x3b814182, 1590x3b810102,
1620x21004e84, 1600x21000e83,
1630x4020007f, 1610x4020007f,
1640x35000100, 1620x35000100,
1650x000004e0, 1630x00000470,
1660x000002a0, 1640x000002f8,
1670x000002e8, 1650x00000430,
1680x00000428,
1690x00000360, 1660x00000360,
1700x000002e8, 1670x000002f8,
1710x000004a0,
1720x00000468,
1730x000003c8, 1680x000003c8,
1690x000004a8,
1700x00000298,
1740x00000360, 1710x00000360,
1720x00200000,
1750x409ffe02, 1730x409ffe02,
1760x30801203, 1740x30801203,
1770x40800204, 1750x40800208,
1780x3ec40085, 1760x3ec40084,
1790x10009c09, 1770x40800407,
1800x3ac10606, 1780x3ac20289,
1810xb060c105, 1790xb060c104,
1820x4020007f, 1800x3ac1c284,
1830x4020007f,
1840x20801203, 1810x20801203,
1850x38810602, 1820x38820282,
1860xb0408586, 1830x41004003,
1870x28810602, 1840xb0408189,
1880x32004180, 1850x28820282,
1890x34204702, 1860x3881c282,
1870xb0408304,
1880x2881c282,
1890x00400000,
1900x40800003,
1910x35000000,
1920x30809e03,
1930x34000182,
1900x21a00382, 1940x21a00382,
1910x4020007f, 1950x4020007f,
1920x327fdc80, 1960x327fde00,
1930x409ffe02, 1970x409ffe02,
1940x30801203, 1980x30801203,
1950x40800204, 1990x40800206,
1960x3ec40087, 2000x3ec40084,
1970x40800405, 2010x40800407,
1980x00200000, 2020x40800608,
1990x40800606, 2030x3ac1828a,
2000x3ac10608, 2040x3ac20289,
2010x3ac14609, 2050xb060c104,
2020x3ac1860a, 2060x3ac1c284,
2030xb060c107,
2040x20801203, 2070x20801203,
2080x38818282,
2050x41004003, 2090x41004003,
2060x38810602, 2100xb040818a,
2070x4020007f, 2110x10005b0b,
2080xb0408188, 2120x41201003,
2090x4020007f, 2130x28818282,
2100x28810602, 2140x3881c282,
2110x41201002, 2150xb0408184,
2120x38814603,
2130x10009c09,
2140xb060c109,
2150x4020007f,
2160x28814603,
2170x41193f83, 2160x41193f83,
2180x38818602,
2190x60ffc003, 2170x60ffc003,
2200xb040818a, 2180x2881c282,
2210x28818602, 2190x38820282,
2220x32003080, 2200xb0408189,
2210x28820282,
2220x327fef80,
2230x409ffe02, 2230x409ffe02,
2240x30801203, 2240x30801203,
2250x40800204, 2250x40800207,
2260x3ec40087, 2260x3ec40086,
2270x41201008, 2270x4120100b,
2280x10009c14, 2280x10005b14,
2290x40800405, 2290x40800404,
2300x3ac10609, 2300x3ac1c289,
2310x40800606, 2310x40800608,
2320x3ac1460a, 2320xb060c106,
2330xb060c107, 2330x3ac10286,
2340x3ac1860b, 2340x3ac2028a,
2350x20801203, 2350x20801203,
2360x38810602, 2360x3881c282,
2370xb0408409,
2380x28810602,
2390x38814603,
2400xb060c40a,
2410x4020007f,
2420x28814603,
2430x41193f83, 2370x41193f83,
2440x38818602,
2450x60ffc003, 2380x60ffc003,
2460xb040818b, 2390xb0408589,
2470x28818602, 2400x2881c282,
2480x32002380, 2410x38810282,
2490x409ffe02, 2420xb0408586,
2500x30801204, 2430x28810282,
2510x40800205, 2440x38820282,
2520x3ec40083, 2450xb040818a,
2530x40800406, 2460x28820282,
2540x3ac14607,
2550x3ac18608,
2560xb0810103,
2570x41004002,
2580x20801204,
2590x4020007f,
2600x38814603,
2610x10009c0b,
2620xb060c107,
2630x4020007f,
2640x4020007f,
2650x28814603,
2660x38818602,
2670x4020007f,
2680x4020007f, 2470x4020007f,
2690xb0408588, 2480x327fe280,
2700x28818602, 2490x409ffe02,
2500x30801203,
2510x40800207,
2520x3ec40084,
2530x40800408,
2540x10005b14,
2550x40800609,
2560x3ac1c28a,
2570x3ac2028b,
2580xb060c104,
2590x3ac24284,
2600x20801203,
2610x41201003,
2620x3881c282,
2630xb040830a,
2640x2881c282,
2650x38820282,
2660xb040818b,
2670x41193f83,
2680x60ffc003,
2690x28820282,
2700x38824282,
2710xb0408184,
2720x28824282,
2710x4020007f, 2730x4020007f,
2720x32001780, 2740x327fd580,
2730x409ffe02, 2750x409ffe02,
2740x1000640e, 2760x1000658e,
2750x40800204, 2770x40800206,
2760x30801203, 2780x30801203,
2770x40800405, 2790x40800407,
2780x3ec40087, 2800x3ec40084,
2790x40800606, 2810x40800608,
2800x3ac10608, 2820x3ac1828a,
2810x3ac14609, 2830x3ac20289,
2820x3ac1860a, 2840xb060c104,
2830xb060c107, 2850x3ac1c284,
2840x20801203, 2860x20801203,
2850x413d8003, 2870x413d8003,
2860x38810602, 2880x38818282,
2870x4020007f, 2890x4020007f,
2880x327fd780, 2900x327fd800,
2890x409ffe02, 2910x409ffe03,
2900x10007f0c, 2920x30801202,
2910x40800205, 2930x40800207,
2920x30801204, 2940x3ec40084,
2930x40800406, 2950x10005b09,
2940x3ec40083, 2960x3ac1c288,
2950x3ac14607, 2970xb0408184,
2960x3ac18608,
2970xb0810103,
2980x413d8002,
2990x20801204,
3000x38814603,
3010x4020007f, 2980x4020007f,
3020x327feb80, 2990x4020007f,
3000x20801202,
3010x3881c282,
3020xb0408308,
3030x2881c282,
3040x327fc680,
3030x409ffe02, 3050x409ffe02,
3060x1000588b,
3070x40800208,
3040x30801203, 3080x30801203,
3050x40800204, 3090x40800407,
3060x3ec40087, 3100x3ec40084,
3070x40800405, 3110x3ac20289,
3080x1000650a, 3120xb060c104,
3090x40800606, 3130x3ac1c284,
3100x3ac10608,
3110x3ac14609,
3120x3ac1860a,
3130xb060c107,
3140x20801203, 3140x20801203,
3150x38810602, 3150x413d8003,
3160xb0408588, 3160x38820282,
3170x4020007f, 3170x327fbd80,
3180x327fc980, 3180x00200000,
3190x00400000, 3190x00000da0,
3200x40800003, 3200x00000000,
3210x4020007f, 3210x00000000,
3220x35000000, 3220x00000000,
3230x00000d90,
3240x00000000,
3250x00000000,
3260x00000000,
3270x00000db0,
3280x00000000,
3290x00000000,
3300x00000000,
3310x00000dc0,
3320x00000000,
3330x00000000,
3340x00000000,
3350x00000d80,
3360x00000000,
3370x00000000,
3380x00000000,
3390x00000df0,
3400x00000000,
3410x00000000,
3420x00000000,
3430x00000de0,
3440x00000000,
3450x00000000,
3460x00000000,
3470x00000dd0,
3480x00000000,
3490x00000000,
3500x00000000,
3510x00000e04,
3520x00000000,
3530x00000000,
3230x00000000, 3540x00000000,
3550x00000e00,
3240x00000000, 3560x00000000,
3250x00000000, 3570x00000000,
3260x00000000, 3580x00000000,
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 08b3530288ac..8b20c0c1556f 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -40,17 +40,13 @@ enum {
40struct spu_context_ops; 40struct spu_context_ops;
41struct spu_gang; 41struct spu_gang;
42 42
43/* 43enum {
44 * This is the state for spu utilization reporting to userspace. 44 SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */
45 * Because this state is visible to userspace it must never change and needs 45};
46 * to be kept strictly separate from any internal state kept by the kernel. 46
47 */ 47/* ctx->sched_flags */
48enum spuctx_execution_state { 48enum {
49 SPUCTX_UTIL_USER = 0, 49 SPU_SCHED_NOTIFY_ACTIVE,
50 SPUCTX_UTIL_SYSTEM,
51 SPUCTX_UTIL_IOWAIT,
52 SPUCTX_UTIL_LOADED,
53 SPUCTX_UTIL_MAX
54}; 50};
55 51
56struct spu_context { 52struct spu_context {
@@ -89,6 +85,8 @@ struct spu_context {
89 85
90 struct list_head gang_list; 86 struct list_head gang_list;
91 struct spu_gang *gang; 87 struct spu_gang *gang;
88 struct kref *prof_priv_kref;
89 void ( * prof_priv_release) (struct kref *kref);
92 90
93 /* owner thread */ 91 /* owner thread */
94 pid_t tid; 92 pid_t tid;
@@ -104,9 +102,9 @@ struct spu_context {
104 /* statistics */ 102 /* statistics */
105 struct { 103 struct {
106 /* updates protected by ctx->state_mutex */ 104 /* updates protected by ctx->state_mutex */
107 enum spuctx_execution_state execution_state; 105 enum spu_utilization_state util_state;
108 unsigned long tstamp; /* time of last ctx switch */ 106 unsigned long long tstamp; /* time of last state switch */
109 unsigned long times[SPUCTX_UTIL_MAX]; 107 unsigned long long times[SPU_UTIL_MAX];
110 unsigned long long vol_ctx_switch; 108 unsigned long long vol_ctx_switch;
111 unsigned long long invol_ctx_switch; 109 unsigned long long invol_ctx_switch;
112 unsigned long long min_flt; 110 unsigned long long min_flt;
@@ -118,6 +116,10 @@ struct spu_context {
118 unsigned long long class2_intr_base; /* # at last ctx switch */ 116 unsigned long long class2_intr_base; /* # at last ctx switch */
119 unsigned long long libassist; 117 unsigned long long libassist;
120 } stats; 118 } stats;
119
120 struct list_head aff_list;
121 int aff_head;
122 int aff_offset;
121}; 123};
122 124
123struct spu_gang { 125struct spu_gang {
@@ -125,8 +127,19 @@ struct spu_gang {
125 struct mutex mutex; 127 struct mutex mutex;
126 struct kref kref; 128 struct kref kref;
127 int contexts; 129 int contexts;
130
131 struct spu_context *aff_ref_ctx;
132 struct list_head aff_list_head;
133 struct mutex aff_mutex;
134 int aff_flags;
135 struct spu *aff_ref_spu;
136 atomic_t aff_sched_count;
128}; 137};
129 138
139/* Flag bits for spu_gang aff_flags */
140#define AFF_OFFSETS_SET 1
141#define AFF_MERGED 2
142
130struct mfc_dma_command { 143struct mfc_dma_command {
131 int32_t pad; /* reserved */ 144 int32_t pad; /* reserved */
132 uint32_t lsa; /* local storage address */ 145 uint32_t lsa; /* local storage address */
@@ -190,10 +203,9 @@ extern struct tree_descr spufs_dir_contents[];
190extern struct tree_descr spufs_dir_nosched_contents[]; 203extern struct tree_descr spufs_dir_nosched_contents[];
191 204
192/* system call implementation */ 205/* system call implementation */
193long spufs_run_spu(struct file *file, 206long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
194 struct spu_context *ctx, u32 *npc, u32 *status); 207long spufs_create(struct nameidata *nd, unsigned int flags,
195long spufs_create(struct nameidata *nd, 208 mode_t mode, struct file *filp);
196 unsigned int flags, mode_t mode);
197extern const struct file_operations spufs_context_fops; 209extern const struct file_operations spufs_context_fops;
198 210
199/* gang management */ 211/* gang management */
@@ -206,6 +218,9 @@ void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
206/* fault handling */ 218/* fault handling */
207int spufs_handle_class1(struct spu_context *ctx); 219int spufs_handle_class1(struct spu_context *ctx);
208 220
221/* affinity */
222struct spu *affinity_check(struct spu_context *ctx);
223
209/* context management */ 224/* context management */
210extern atomic_t nr_spu_contexts; 225extern atomic_t nr_spu_contexts;
211static inline void spu_acquire(struct spu_context *ctx) 226static inline void spu_acquire(struct spu_context *ctx)
@@ -227,15 +242,17 @@ void spu_unmap_mappings(struct spu_context *ctx);
227void spu_forget(struct spu_context *ctx); 242void spu_forget(struct spu_context *ctx);
228int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags); 243int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags);
229void spu_acquire_saved(struct spu_context *ctx); 244void spu_acquire_saved(struct spu_context *ctx);
245void spu_release_saved(struct spu_context *ctx);
230 246
231int spu_activate(struct spu_context *ctx, unsigned long flags); 247int spu_activate(struct spu_context *ctx, unsigned long flags);
232void spu_deactivate(struct spu_context *ctx); 248void spu_deactivate(struct spu_context *ctx);
233void spu_yield(struct spu_context *ctx); 249void spu_yield(struct spu_context *ctx);
250void spu_switch_notify(struct spu *spu, struct spu_context *ctx);
234void spu_set_timeslice(struct spu_context *ctx); 251void spu_set_timeslice(struct spu_context *ctx);
235void spu_update_sched_info(struct spu_context *ctx); 252void spu_update_sched_info(struct spu_context *ctx);
236void __spu_update_sched_info(struct spu_context *ctx); 253void __spu_update_sched_info(struct spu_context *ctx);
237int __init spu_sched_init(void); 254int __init spu_sched_init(void);
238void __exit spu_sched_exit(void); 255void spu_sched_exit(void);
239 256
240extern char *isolated_loader; 257extern char *isolated_loader;
241 258
@@ -293,30 +310,34 @@ extern int spufs_coredump_num_notes;
293 * line. 310 * line.
294 */ 311 */
295static inline void spuctx_switch_state(struct spu_context *ctx, 312static inline void spuctx_switch_state(struct spu_context *ctx,
296 enum spuctx_execution_state new_state) 313 enum spu_utilization_state new_state)
297{ 314{
298 WARN_ON(!mutex_is_locked(&ctx->state_mutex)); 315 unsigned long long curtime;
299 316 signed long long delta;
300 if (ctx->stats.execution_state != new_state) { 317 struct timespec ts;
301 unsigned long curtime = jiffies; 318 struct spu *spu;
302 319 enum spu_utilization_state old_state;
303 ctx->stats.times[ctx->stats.execution_state] +=
304 curtime - ctx->stats.tstamp;
305 ctx->stats.tstamp = curtime;
306 ctx->stats.execution_state = new_state;
307 }
308}
309 320
310static inline void spu_switch_state(struct spu *spu, 321 ktime_get_ts(&ts);
311 enum spuctx_execution_state new_state) 322 curtime = timespec_to_ns(&ts);
312{ 323 delta = curtime - ctx->stats.tstamp;
313 if (spu->stats.utilization_state != new_state) {
314 unsigned long curtime = jiffies;
315 324
316 spu->stats.times[spu->stats.utilization_state] += 325 WARN_ON(!mutex_is_locked(&ctx->state_mutex));
317 curtime - spu->stats.tstamp; 326 WARN_ON(delta < 0);
327
328 spu = ctx->spu;
329 old_state = ctx->stats.util_state;
330 ctx->stats.util_state = new_state;
331 ctx->stats.tstamp = curtime;
332
333 /*
334 * Update the physical SPU utilization statistics.
335 */
336 if (spu) {
337 ctx->stats.times[old_state] += delta;
338 spu->stats.times[old_state] += delta;
339 spu->stats.util_state = new_state;
318 spu->stats.tstamp = curtime; 340 spu->stats.tstamp = curtime;
319 spu->stats.utilization_state = new_state;
320 } 341 }
321} 342}
322 343
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
index 9c506ba08cdc..27ffdae98e5a 100644
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -180,7 +180,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
180 case MFC_CNTL_SUSPEND_COMPLETE: 180 case MFC_CNTL_SUSPEND_COMPLETE:
181 if (csa) { 181 if (csa) {
182 csa->priv2.mfc_control_RW = 182 csa->priv2.mfc_control_RW =
183 in_be64(&priv2->mfc_control_RW) | 183 MFC_CNTL_SUSPEND_MASK |
184 MFC_CNTL_SUSPEND_DMA_QUEUE; 184 MFC_CNTL_SUSPEND_DMA_QUEUE;
185 } 185 }
186 break; 186 break;
@@ -190,9 +190,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
190 MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == 190 MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
191 MFC_CNTL_SUSPEND_COMPLETE); 191 MFC_CNTL_SUSPEND_COMPLETE);
192 if (csa) { 192 if (csa) {
193 csa->priv2.mfc_control_RW = 193 csa->priv2.mfc_control_RW = 0;
194 in_be64(&priv2->mfc_control_RW) &
195 ~MFC_CNTL_SUSPEND_DMA_QUEUE;
196 } 194 }
197 break; 195 break;
198 } 196 }
@@ -251,16 +249,8 @@ static inline void save_mfc_decr(struct spu_state *csa, struct spu *spu)
251 * Read MFC_CNTL[Ds]. Update saved copy of 249 * Read MFC_CNTL[Ds]. Update saved copy of
252 * CSA.MFC_CNTL[Ds]. 250 * CSA.MFC_CNTL[Ds].
253 */ 251 */
254 if (in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING) { 252 csa->priv2.mfc_control_RW |=
255 csa->priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; 253 in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING;
256 csa->suspend_time = get_cycles();
257 out_be64(&priv2->spu_chnlcntptr_RW, 7ULL);
258 eieio();
259 csa->spu_chnldata_RW[7] = in_be64(&priv2->spu_chnldata_RW);
260 eieio();
261 } else {
262 csa->priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
263 }
264} 254}
265 255
266static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) 256static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
@@ -271,7 +261,8 @@ static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
271 * Write MFC_CNTL[Dh] set to a '1' to halt 261 * Write MFC_CNTL[Dh] set to a '1' to halt
272 * the decrementer. 262 * the decrementer.
273 */ 263 */
274 out_be64(&priv2->mfc_control_RW, MFC_CNTL_DECREMENTER_HALTED); 264 out_be64(&priv2->mfc_control_RW,
265 MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK);
275 eieio(); 266 eieio();
276} 267}
277 268
@@ -615,7 +606,7 @@ static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu)
615static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) 606static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
616{ 607{
617 struct spu_priv2 __iomem *priv2 = spu->priv2; 608 struct spu_priv2 __iomem *priv2 = spu->priv2;
618 u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; 609 u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
619 int i; 610 int i;
620 611
621 /* Save, Step 42: 612 /* Save, Step 42:
@@ -626,7 +617,7 @@ static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
626 csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW); 617 csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW);
627 618
628 /* Save the following CH: [0,3,4,24,25,27] */ 619 /* Save the following CH: [0,3,4,24,25,27] */
629 for (i = 0; i < 7; i++) { 620 for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
630 idx = ch_indices[i]; 621 idx = ch_indices[i];
631 out_be64(&priv2->spu_chnlcntptr_RW, idx); 622 out_be64(&priv2->spu_chnlcntptr_RW, idx);
632 eieio(); 623 eieio();
@@ -983,13 +974,13 @@ static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu)
983 */ 974 */
984} 975}
985 976
986static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) 977static inline void suspend_mfc_and_halt_decr(struct spu_state *csa,
978 struct spu *spu)
987{ 979{
988 struct spu_priv2 __iomem *priv2 = spu->priv2; 980 struct spu_priv2 __iomem *priv2 = spu->priv2;
989 981
990 /* Restore, Step 7: 982 /* Restore, Step 7:
991 * Restore, Step 47. 983 * Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend
992 * Write MFC_Cntl[Dh,Sc]='1','1' to suspend
993 * the queue and halt the decrementer. 984 * the queue and halt the decrementer.
994 */ 985 */
995 out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE | 986 out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE |
@@ -1090,7 +1081,7 @@ static inline void clear_spu_status(struct spu_state *csa, struct spu *spu)
1090static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) 1081static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu)
1091{ 1082{
1092 struct spu_priv2 __iomem *priv2 = spu->priv2; 1083 struct spu_priv2 __iomem *priv2 = spu->priv2;
1093 u64 ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; 1084 u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
1094 u64 idx; 1085 u64 idx;
1095 int i; 1086 int i;
1096 1087
@@ -1102,7 +1093,7 @@ static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu)
1102 out_be64(&priv2->spu_chnldata_RW, 0UL); 1093 out_be64(&priv2->spu_chnldata_RW, 0UL);
1103 1094
1104 /* Reset the following CH: [0,3,4,24,25,27] */ 1095 /* Reset the following CH: [0,3,4,24,25,27] */
1105 for (i = 0; i < 7; i++) { 1096 for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
1106 idx = ch_indices[i]; 1097 idx = ch_indices[i];
1107 out_be64(&priv2->spu_chnlcntptr_RW, idx); 1098 out_be64(&priv2->spu_chnlcntptr_RW, idx);
1108 eieio(); 1099 eieio();
@@ -1289,7 +1280,15 @@ static inline void setup_decr(struct spu_state *csa, struct spu *spu)
1289 cycles_t resume_time = get_cycles(); 1280 cycles_t resume_time = get_cycles();
1290 cycles_t delta_time = resume_time - csa->suspend_time; 1281 cycles_t delta_time = resume_time - csa->suspend_time;
1291 1282
1283 csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING;
1284 if (csa->lscsa->decr.slot[0] < delta_time) {
1285 csa->lscsa->decr_status.slot[0] |=
1286 SPU_DECR_STATUS_WRAPPED;
1287 }
1288
1292 csa->lscsa->decr.slot[0] -= delta_time; 1289 csa->lscsa->decr.slot[0] -= delta_time;
1290 } else {
1291 csa->lscsa->decr_status.slot[0] = 0;
1293 } 1292 }
1294} 1293}
1295 1294
@@ -1398,6 +1397,18 @@ static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu)
1398 send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); 1397 send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
1399} 1398}
1400 1399
1400static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
1401{
1402 struct spu_priv2 __iomem *priv2 = spu->priv2;
1403
1404 /* Restore, Step 47.
1405 * Write MFC_Cntl[Sc,Sm]='1','0' to suspend
1406 * the queue.
1407 */
1408 out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
1409 eieio();
1410}
1411
1401static inline void clear_interrupts(struct spu_state *csa, struct spu *spu) 1412static inline void clear_interrupts(struct spu_state *csa, struct spu *spu)
1402{ 1413{
1403 /* Restore, Step 49: 1414 /* Restore, Step 49:
@@ -1548,10 +1559,10 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
1548 * "wrapped" flag is set, OR in a '1' to 1559 * "wrapped" flag is set, OR in a '1' to
1549 * CSA.SPU_Event_Status[Tm]. 1560 * CSA.SPU_Event_Status[Tm].
1550 */ 1561 */
1551 if (csa->lscsa->decr_status.slot[0] == 1) { 1562 if (csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) {
1552 csa->spu_chnldata_RW[0] |= 0x20; 1563 csa->spu_chnldata_RW[0] |= 0x20;
1553 } 1564 }
1554 if ((csa->lscsa->decr_status.slot[0] == 1) && 1565 if ((csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) &&
1555 (csa->spu_chnlcnt_RW[0] == 0 && 1566 (csa->spu_chnlcnt_RW[0] == 0 &&
1556 ((csa->spu_chnldata_RW[2] & 0x20) == 0x0) && 1567 ((csa->spu_chnldata_RW[2] & 0x20) == 0x0) &&
1557 ((csa->spu_chnldata_RW[0] & 0x20) != 0x1))) { 1568 ((csa->spu_chnldata_RW[0] & 0x20) != 0x1))) {
@@ -1562,18 +1573,13 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
1562static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu) 1573static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu)
1563{ 1574{
1564 struct spu_priv2 __iomem *priv2 = spu->priv2; 1575 struct spu_priv2 __iomem *priv2 = spu->priv2;
1565 u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; 1576 u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
1566 int i; 1577 int i;
1567 1578
1568 /* Restore, Step 59: 1579 /* Restore, Step 59:
1580 * Restore the following CH: [0,3,4,24,25,27]
1569 */ 1581 */
1570 1582 for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
1571 /* Restore CH 1 without count */
1572 out_be64(&priv2->spu_chnlcntptr_RW, 1);
1573 out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[1]);
1574
1575 /* Restore the following CH: [0,3,4,24,25,27] */
1576 for (i = 0; i < 7; i++) {
1577 idx = ch_indices[i]; 1583 idx = ch_indices[i];
1578 out_be64(&priv2->spu_chnlcntptr_RW, idx); 1584 out_be64(&priv2->spu_chnlcntptr_RW, idx);
1579 eieio(); 1585 eieio();
@@ -1932,7 +1938,7 @@ static void harvest(struct spu_state *prev, struct spu *spu)
1932 set_switch_pending(prev, spu); /* Step 5. */ 1938 set_switch_pending(prev, spu); /* Step 5. */
1933 stop_spu_isolate(spu); /* NEW. */ 1939 stop_spu_isolate(spu); /* NEW. */
1934 remove_other_spu_access(prev, spu); /* Step 6. */ 1940 remove_other_spu_access(prev, spu); /* Step 6. */
1935 suspend_mfc(prev, spu); /* Step 7. */ 1941 suspend_mfc_and_halt_decr(prev, spu); /* Step 7. */
1936 wait_suspend_mfc_complete(prev, spu); /* Step 8. */ 1942 wait_suspend_mfc_complete(prev, spu); /* Step 8. */
1937 if (!suspend_spe(prev, spu)) /* Step 9. */ 1943 if (!suspend_spe(prev, spu)) /* Step 9. */
1938 clear_spu_status(prev, spu); /* Step 10. */ 1944 clear_spu_status(prev, spu); /* Step 10. */
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index 8e37bdf4dfda..43f0fb88abbc 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -47,7 +47,7 @@ static long do_spu_run(struct file *filp,
47 goto out; 47 goto out;
48 48
49 i = SPUFS_I(filp->f_path.dentry->d_inode); 49 i = SPUFS_I(filp->f_path.dentry->d_inode);
50 ret = spufs_run_spu(filp, i->i_ctx, &npc, &status); 50 ret = spufs_run_spu(i->i_ctx, &npc, &status);
51 51
52 if (put_user(npc, unpc)) 52 if (put_user(npc, unpc))
53 ret = -EFAULT; 53 ret = -EFAULT;
@@ -76,8 +76,8 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
76} 76}
77#endif 77#endif
78 78
79asmlinkage long sys_spu_create(const char __user *pathname, 79asmlinkage long do_spu_create(const char __user *pathname, unsigned int flags,
80 unsigned int flags, mode_t mode) 80 mode_t mode, struct file *neighbor)
81{ 81{
82 char *tmp; 82 char *tmp;
83 int ret; 83 int ret;
@@ -90,7 +90,7 @@ asmlinkage long sys_spu_create(const char __user *pathname,
90 ret = path_lookup(tmp, LOOKUP_PARENT| 90 ret = path_lookup(tmp, LOOKUP_PARENT|
91 LOOKUP_OPEN|LOOKUP_CREATE, &nd); 91 LOOKUP_OPEN|LOOKUP_CREATE, &nd);
92 if (!ret) { 92 if (!ret) {
93 ret = spufs_create(&nd, flags, mode); 93 ret = spufs_create(&nd, flags, mode, neighbor);
94 path_release(&nd); 94 path_release(&nd);
95 } 95 }
96 putname(tmp); 96 putname(tmp);
@@ -99,8 +99,32 @@ asmlinkage long sys_spu_create(const char __user *pathname,
99 return ret; 99 return ret;
100} 100}
101 101
102#ifndef MODULE
103asmlinkage long sys_spu_create(const char __user *pathname, unsigned int flags,
104 mode_t mode, int neighbor_fd)
105{
106 int fput_needed;
107 struct file *neighbor;
108 long ret;
109
110 if (flags & SPU_CREATE_AFFINITY_SPU) {
111 ret = -EBADF;
112 neighbor = fget_light(neighbor_fd, &fput_needed);
113 if (neighbor) {
114 ret = do_spu_create(pathname, flags, mode, neighbor);
115 fput_light(neighbor, fput_needed);
116 }
117 }
118 else {
119 ret = do_spu_create(pathname, flags, mode, NULL);
120 }
121
122 return ret;
123}
124#endif
125
102struct spufs_calls spufs_calls = { 126struct spufs_calls spufs_calls = {
103 .create_thread = sys_spu_create, 127 .create_thread = do_spu_create,
104 .spu_run = do_spu_run, 128 .spu_run = do_spu_run,
105 .owner = THIS_MODULE, 129 .owner = THIS_MODULE,
106}; 130};
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index f65078c3d3b3..484eb4e0e9db 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_QUICC_ENGINE) += qe_lib/
17mv64x60-$(CONFIG_PCI) += mv64x60_pci.o 17mv64x60-$(CONFIG_PCI) += mv64x60_pci.o
18obj-$(CONFIG_MV64X60) += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o 18obj-$(CONFIG_MV64X60) += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o
19obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o 19obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o
20obj-$(CONFIG_AXON_RAM) += axonram.o
20 21
21# contains only the suspend handler for time 22# contains only the suspend handler for time
22ifeq ($(CONFIG_RTC_CLASS),) 23ifeq ($(CONFIG_RTC_CLASS),)
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
new file mode 100644
index 000000000000..2326d5dc5752
--- /dev/null
+++ b/arch/powerpc/sysdev/axonram.c
@@ -0,0 +1,381 @@
1/*
2 * (C) Copyright IBM Deutschland Entwicklung GmbH 2006
3 *
4 * Author: Maxim Shchetynin <maxim@de.ibm.com>
5 *
6 * Axon DDR2 device driver.
7 * It registers one block device per Axon's DDR2 memory bank found on a system.
8 * Block devices are called axonram?, their major and minor numbers are
9 * available in /proc/devices, /proc/partitions or in /sys/block/axonram?/dev.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2, or (at your option)
14 * any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 */
25
26#include <linux/bio.h>
27#include <linux/blkdev.h>
28#include <linux/buffer_head.h>
29#include <linux/device.h>
30#include <linux/errno.h>
31#include <linux/fs.h>
32#include <linux/genhd.h>
33#include <linux/interrupt.h>
34#include <linux/io.h>
35#include <linux/ioport.h>
36#include <linux/irq.h>
37#include <linux/irqreturn.h>
38#include <linux/kernel.h>
39#include <linux/mm.h>
40#include <linux/mod_devicetable.h>
41#include <linux/module.h>
42#include <linux/slab.h>
43#include <linux/string.h>
44#include <linux/types.h>
45#include <asm/of_device.h>
46#include <asm/of_platform.h>
47#include <asm/page.h>
48#include <asm/prom.h>
49
50#define AXON_RAM_MODULE_NAME "axonram"
51#define AXON_RAM_DEVICE_NAME "axonram"
52#define AXON_RAM_MINORS_PER_DISK 16
53#define AXON_RAM_BLOCK_SHIFT PAGE_SHIFT
54#define AXON_RAM_BLOCK_SIZE 1 << AXON_RAM_BLOCK_SHIFT
55#define AXON_RAM_SECTOR_SHIFT 9
56#define AXON_RAM_SECTOR_SIZE 1 << AXON_RAM_SECTOR_SHIFT
57#define AXON_RAM_IRQ_FLAGS IRQF_SHARED | IRQF_TRIGGER_RISING
58
59struct axon_ram_bank {
60 struct of_device *device;
61 struct gendisk *disk;
62 unsigned int irq_correctable;
63 unsigned int irq_uncorrectable;
64 unsigned long ph_addr;
65 unsigned long io_addr;
66 unsigned long size;
67 unsigned long ecc_counter;
68};
69
70static ssize_t
71axon_ram_sysfs_ecc(struct device *dev, struct device_attribute *attr, char *buf)
72{
73 struct of_device *device = to_of_device(dev);
74 struct axon_ram_bank *bank = device->dev.platform_data;
75
76 BUG_ON(!bank);
77
78 return sprintf(buf, "%ld\n", bank->ecc_counter);
79}
80
81static DEVICE_ATTR(ecc, S_IRUGO, axon_ram_sysfs_ecc, NULL);
82
83/**
84 * axon_ram_irq_handler - interrupt handler for Axon RAM ECC
85 * @irq: interrupt ID
86 * @dev: pointer to of_device
87 */
88static irqreturn_t
89axon_ram_irq_handler(int irq, void *dev)
90{
91 struct of_device *device = dev;
92 struct axon_ram_bank *bank = device->dev.platform_data;
93
94 BUG_ON(!bank);
95
96 if (irq == bank->irq_correctable) {
97 dev_err(&device->dev, "Correctable memory error occured\n");
98 bank->ecc_counter++;
99 return IRQ_HANDLED;
100 } else if (irq == bank->irq_uncorrectable) {
101 dev_err(&device->dev, "Uncorrectable memory error occured\n");
102 panic("Critical ECC error on %s", device->node->full_name);
103 }
104
105 return IRQ_NONE;
106}
107
108/**
109 * axon_ram_make_request - make_request() method for block device
110 * @queue, @bio: see blk_queue_make_request()
111 */
112static int
113axon_ram_make_request(struct request_queue *queue, struct bio *bio)
114{
115 struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data;
116 unsigned long phys_mem, phys_end;
117 void *user_mem;
118 struct bio_vec *vec;
119 unsigned int transfered;
120 unsigned short idx;
121 int rc = 0;
122
123 phys_mem = bank->io_addr + (bio->bi_sector << AXON_RAM_SECTOR_SHIFT);
124 phys_end = bank->io_addr + bank->size;
125 transfered = 0;
126 bio_for_each_segment(vec, bio, idx) {
127 if (unlikely(phys_mem + vec->bv_len > phys_end)) {
128 bio_io_error(bio, bio->bi_size);
129 rc = -ERANGE;
130 break;
131 }
132
133 user_mem = page_address(vec->bv_page) + vec->bv_offset;
134 if (bio_data_dir(bio) == READ)
135 memcpy(user_mem, (void *) phys_mem, vec->bv_len);
136 else
137 memcpy((void *) phys_mem, user_mem, vec->bv_len);
138
139 phys_mem += vec->bv_len;
140 transfered += vec->bv_len;
141 }
142 bio_endio(bio, transfered, 0);
143
144 return rc;
145}
146
147/**
148 * axon_ram_direct_access - direct_access() method for block device
149 * @device, @sector, @data: see block_device_operations method
150 */
151static int
152axon_ram_direct_access(struct block_device *device, sector_t sector,
153 unsigned long *data)
154{
155 struct axon_ram_bank *bank = device->bd_disk->private_data;
156 loff_t offset;
157
158 offset = sector << AXON_RAM_SECTOR_SHIFT;
159 if (offset >= bank->size) {
160 dev_err(&bank->device->dev, "Access outside of address space\n");
161 return -ERANGE;
162 }
163
164 *data = bank->ph_addr + offset;
165
166 return 0;
167}
168
169static struct block_device_operations axon_ram_devops = {
170 .owner = THIS_MODULE,
171 .direct_access = axon_ram_direct_access
172};
173
174/**
175 * axon_ram_probe - probe() method for platform driver
176 * @device, @device_id: see of_platform_driver method
177 */
178static int
179axon_ram_probe(struct of_device *device, const struct of_device_id *device_id)
180{
181 static int axon_ram_bank_id = -1;
182 struct axon_ram_bank *bank;
183 struct resource resource;
184 int rc = 0;
185
186 axon_ram_bank_id++;
187
188 dev_info(&device->dev, "Found memory controller on %s\n",
189 device->node->full_name);
190
191 bank = kzalloc(sizeof(struct axon_ram_bank), GFP_KERNEL);
192 if (bank == NULL) {
193 dev_err(&device->dev, "Out of memory\n");
194 rc = -ENOMEM;
195 goto failed;
196 }
197
198 device->dev.platform_data = bank;
199
200 bank->device = device;
201
202 if (of_address_to_resource(device->node, 0, &resource) != 0) {
203 dev_err(&device->dev, "Cannot access device tree\n");
204 rc = -EFAULT;
205 goto failed;
206 }
207
208 bank->size = resource.end - resource.start + 1;
209
210 if (bank->size == 0) {
211 dev_err(&device->dev, "No DDR2 memory found for %s%d\n",
212 AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
213 rc = -ENODEV;
214 goto failed;
215 }
216
217 dev_info(&device->dev, "Register DDR2 memory device %s%d with %luMB\n",
218 AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20);
219
220 bank->ph_addr = resource.start;
221 bank->io_addr = (unsigned long) ioremap_flags(
222 bank->ph_addr, bank->size, _PAGE_NO_CACHE);
223 if (bank->io_addr == 0) {
224 dev_err(&device->dev, "ioremap() failed\n");
225 rc = -EFAULT;
226 goto failed;
227 }
228
229 bank->disk = alloc_disk(AXON_RAM_MINORS_PER_DISK);
230 if (bank->disk == NULL) {
231 dev_err(&device->dev, "Cannot register disk\n");
232 rc = -EFAULT;
233 goto failed;
234 }
235
236 bank->disk->first_minor = 0;
237 bank->disk->fops = &axon_ram_devops;
238 bank->disk->private_data = bank;
239 bank->disk->driverfs_dev = &device->dev;
240
241 sprintf(bank->disk->disk_name, "%s%d",
242 AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
243 bank->disk->major = register_blkdev(0, bank->disk->disk_name);
244 if (bank->disk->major < 0) {
245 dev_err(&device->dev, "Cannot register block device\n");
246 rc = -EFAULT;
247 goto failed;
248 }
249
250 bank->disk->queue = blk_alloc_queue(GFP_KERNEL);
251 if (bank->disk->queue == NULL) {
252 dev_err(&device->dev, "Cannot register disk queue\n");
253 rc = -EFAULT;
254 goto failed;
255 }
256
257 set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
258 blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
259 blk_queue_hardsect_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
260 add_disk(bank->disk);
261
262 bank->irq_correctable = irq_of_parse_and_map(device->node, 0);
263 bank->irq_uncorrectable = irq_of_parse_and_map(device->node, 1);
264 if ((bank->irq_correctable <= 0) || (bank->irq_uncorrectable <= 0)) {
265 dev_err(&device->dev, "Cannot access ECC interrupt ID\n");
266 rc = -EFAULT;
267 goto failed;
268 }
269
270 rc = request_irq(bank->irq_correctable, axon_ram_irq_handler,
271 AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device);
272 if (rc != 0) {
273 dev_err(&device->dev, "Cannot register ECC interrupt handler\n");
274 bank->irq_correctable = bank->irq_uncorrectable = 0;
275 rc = -EFAULT;
276 goto failed;
277 }
278
279 rc = request_irq(bank->irq_uncorrectable, axon_ram_irq_handler,
280 AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device);
281 if (rc != 0) {
282 dev_err(&device->dev, "Cannot register ECC interrupt handler\n");
283 bank->irq_uncorrectable = 0;
284 rc = -EFAULT;
285 goto failed;
286 }
287
288 rc = device_create_file(&device->dev, &dev_attr_ecc);
289 if (rc != 0) {
290 dev_err(&device->dev, "Cannot create sysfs file\n");
291 rc = -EFAULT;
292 goto failed;
293 }
294
295 return 0;
296
297failed:
298 if (bank != NULL) {
299 if (bank->irq_uncorrectable > 0)
300 free_irq(bank->irq_uncorrectable, device);
301 if (bank->irq_correctable > 0)
302 free_irq(bank->irq_correctable, device);
303 if (bank->disk != NULL) {
304 if (bank->disk->queue != NULL)
305 blk_cleanup_queue(bank->disk->queue);
306 if (bank->disk->major > 0)
307 unregister_blkdev(bank->disk->major,
308 bank->disk->disk_name);
309 del_gendisk(bank->disk);
310 }
311 device->dev.platform_data = NULL;
312 if (bank->io_addr != 0)
313 iounmap((void __iomem *) bank->io_addr);
314 kfree(bank);
315 }
316
317 return rc;
318}
319
320/**
321 * axon_ram_remove - remove() method for platform driver
322 * @device: see of_platform_driver method
323 */
324static int
325axon_ram_remove(struct of_device *device)
326{
327 struct axon_ram_bank *bank = device->dev.platform_data;
328
329 BUG_ON(!bank || !bank->disk);
330
331 device_remove_file(&device->dev, &dev_attr_ecc);
332 free_irq(bank->irq_uncorrectable, device);
333 free_irq(bank->irq_correctable, device);
334 blk_cleanup_queue(bank->disk->queue);
335 unregister_blkdev(bank->disk->major, bank->disk->disk_name);
336 del_gendisk(bank->disk);
337 iounmap((void __iomem *) bank->io_addr);
338 kfree(bank);
339
340 return 0;
341}
342
343static struct of_device_id axon_ram_device_id[] = {
344 {
345 .type = "dma-memory"
346 },
347 {}
348};
349
350static struct of_platform_driver axon_ram_driver = {
351 .owner = THIS_MODULE,
352 .name = AXON_RAM_MODULE_NAME,
353 .match_table = axon_ram_device_id,
354 .probe = axon_ram_probe,
355 .remove = axon_ram_remove
356};
357
358/**
359 * axon_ram_init
360 */
361static int __init
362axon_ram_init(void)
363{
364 return of_register_platform_driver(&axon_ram_driver);
365}
366
367/**
368 * axon_ram_exit
369 */
370static void __exit
371axon_ram_exit(void)
372{
373 of_unregister_platform_driver(&axon_ram_driver);
374}
375
376module_init(axon_ram_init);
377module_exit(axon_ram_exit);
378
379MODULE_LICENSE("GPL");
380MODULE_AUTHOR("Maxim Shchetynin <maxim@de.ibm.com>");
381MODULE_DESCRIPTION("Axon DDR2 RAM device driver for IBM Cell BE");
diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c
index 85a7c99c1003..2f91b55b7754 100644
--- a/arch/powerpc/sysdev/pmi.c
+++ b/arch/powerpc/sysdev/pmi.c
@@ -48,15 +48,13 @@ struct pmi_data {
48 struct work_struct work; 48 struct work_struct work;
49}; 49};
50 50
51static struct pmi_data *data;
51 52
52static int pmi_irq_handler(int irq, void *dev_id) 53static int pmi_irq_handler(int irq, void *dev_id)
53{ 54{
54 struct pmi_data *data;
55 u8 type; 55 u8 type;
56 int rc; 56 int rc;
57 57
58 data = dev_id;
59
60 spin_lock(&data->pmi_spinlock); 58 spin_lock(&data->pmi_spinlock);
61 59
62 type = ioread8(data->pmi_reg + PMI_READ_TYPE); 60 type = ioread8(data->pmi_reg + PMI_READ_TYPE);
@@ -111,16 +109,13 @@ MODULE_DEVICE_TABLE(of, pmi_match);
111 109
112static void pmi_notify_handlers(struct work_struct *work) 110static void pmi_notify_handlers(struct work_struct *work)
113{ 111{
114 struct pmi_data *data;
115 struct pmi_handler *handler; 112 struct pmi_handler *handler;
116 113
117 data = container_of(work, struct pmi_data, work);
118
119 spin_lock(&data->handler_spinlock); 114 spin_lock(&data->handler_spinlock);
120 list_for_each_entry(handler, &data->handler, node) { 115 list_for_each_entry(handler, &data->handler, node) {
121 pr_debug(KERN_INFO "pmi: notifying handler %p\n", handler); 116 pr_debug(KERN_INFO "pmi: notifying handler %p\n", handler);
122 if (handler->type == data->msg.type) 117 if (handler->type == data->msg.type)
123 handler->handle_pmi_message(data->dev, data->msg); 118 handler->handle_pmi_message(data->msg);
124 } 119 }
125 spin_unlock(&data->handler_spinlock); 120 spin_unlock(&data->handler_spinlock);
126} 121}
@@ -129,9 +124,14 @@ static int pmi_of_probe(struct of_device *dev,
129 const struct of_device_id *match) 124 const struct of_device_id *match)
130{ 125{
131 struct device_node *np = dev->node; 126 struct device_node *np = dev->node;
132 struct pmi_data *data;
133 int rc; 127 int rc;
134 128
129 if (data) {
130 printk(KERN_ERR "pmi: driver has already been initialized.\n");
131 rc = -EBUSY;
132 goto out;
133 }
134
135 data = kzalloc(sizeof(struct pmi_data), GFP_KERNEL); 135 data = kzalloc(sizeof(struct pmi_data), GFP_KERNEL);
136 if (!data) { 136 if (!data) {
137 printk(KERN_ERR "pmi: could not allocate memory.\n"); 137 printk(KERN_ERR "pmi: could not allocate memory.\n");
@@ -154,7 +154,6 @@ static int pmi_of_probe(struct of_device *dev,
154 154
155 INIT_WORK(&data->work, pmi_notify_handlers); 155 INIT_WORK(&data->work, pmi_notify_handlers);
156 156
157 dev->dev.driver_data = data;
158 data->dev = dev; 157 data->dev = dev;
159 158
160 data->irq = irq_of_parse_and_map(np, 0); 159 data->irq = irq_of_parse_and_map(np, 0);
@@ -164,7 +163,7 @@ static int pmi_of_probe(struct of_device *dev,
164 goto error_cleanup_iomap; 163 goto error_cleanup_iomap;
165 } 164 }
166 165
167 rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", data); 166 rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", NULL);
168 if (rc) { 167 if (rc) {
169 printk(KERN_ERR "pmi: can't request IRQ %d: returned %d\n", 168 printk(KERN_ERR "pmi: can't request IRQ %d: returned %d\n",
170 data->irq, rc); 169 data->irq, rc);
@@ -187,12 +186,9 @@ out:
187 186
188static int pmi_of_remove(struct of_device *dev) 187static int pmi_of_remove(struct of_device *dev)
189{ 188{
190 struct pmi_data *data;
191 struct pmi_handler *handler, *tmp; 189 struct pmi_handler *handler, *tmp;
192 190
193 data = dev->dev.driver_data; 191 free_irq(data->irq, NULL);
194
195 free_irq(data->irq, data);
196 iounmap(data->pmi_reg); 192 iounmap(data->pmi_reg);
197 193
198 spin_lock(&data->handler_spinlock); 194 spin_lock(&data->handler_spinlock);
@@ -202,7 +198,8 @@ static int pmi_of_remove(struct of_device *dev)
202 198
203 spin_unlock(&data->handler_spinlock); 199 spin_unlock(&data->handler_spinlock);
204 200
205 kfree(dev->dev.driver_data); 201 kfree(data);
202 data = NULL;
206 203
207 return 0; 204 return 0;
208} 205}
@@ -226,13 +223,13 @@ static void __exit pmi_module_exit(void)
226} 223}
227module_exit(pmi_module_exit); 224module_exit(pmi_module_exit);
228 225
229void pmi_send_message(struct of_device *device, pmi_message_t msg) 226int pmi_send_message(pmi_message_t msg)
230{ 227{
231 struct pmi_data *data;
232 unsigned long flags; 228 unsigned long flags;
233 DECLARE_COMPLETION_ONSTACK(completion); 229 DECLARE_COMPLETION_ONSTACK(completion);
234 230
235 data = device->dev.driver_data; 231 if (!data)
232 return -ENODEV;
236 233
237 mutex_lock(&data->msg_mutex); 234 mutex_lock(&data->msg_mutex);
238 235
@@ -256,30 +253,26 @@ void pmi_send_message(struct of_device *device, pmi_message_t msg)
256 data->completion = NULL; 253 data->completion = NULL;
257 254
258 mutex_unlock(&data->msg_mutex); 255 mutex_unlock(&data->msg_mutex);
256
257 return 0;
259} 258}
260EXPORT_SYMBOL_GPL(pmi_send_message); 259EXPORT_SYMBOL_GPL(pmi_send_message);
261 260
262void pmi_register_handler(struct of_device *device, 261int pmi_register_handler(struct pmi_handler *handler)
263 struct pmi_handler *handler)
264{ 262{
265 struct pmi_data *data;
266 data = device->dev.driver_data;
267
268 if (!data) 263 if (!data)
269 return; 264 return -ENODEV;
270 265
271 spin_lock(&data->handler_spinlock); 266 spin_lock(&data->handler_spinlock);
272 list_add_tail(&handler->node, &data->handler); 267 list_add_tail(&handler->node, &data->handler);
273 spin_unlock(&data->handler_spinlock); 268 spin_unlock(&data->handler_spinlock);
269
270 return 0;
274} 271}
275EXPORT_SYMBOL_GPL(pmi_register_handler); 272EXPORT_SYMBOL_GPL(pmi_register_handler);
276 273
277void pmi_unregister_handler(struct of_device *device, 274void pmi_unregister_handler(struct pmi_handler *handler)
278 struct pmi_handler *handler)
279{ 275{
280 struct pmi_data *data;
281 data = device->dev.driver_data;
282
283 if (!data) 276 if (!data)
284 return; 277 return;
285 278