diff options
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/Makefile | 49 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/spu_restore.c | 336 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S | 116 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/spu_save.c | 195 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/spu_save_crt0.S | 102 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/spu_utils.h | 160 |
6 files changed, 958 insertions, 0 deletions
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile index e70e3cc1158f..b38ab747efd7 100644 --- a/arch/powerpc/platforms/cell/spufs/Makefile +++ b/arch/powerpc/platforms/cell/spufs/Makefile | |||
@@ -2,4 +2,53 @@ obj-$(CONFIG_SPU_FS) += spufs.o | |||
2 | 2 | ||
3 | spufs-y += inode.o file.o context.o switch.o syscalls.o | 3 | spufs-y += inode.o file.o context.o switch.o syscalls.o |
4 | 4 | ||
5 | # Rules to build switch.o with the help of SPU tool chain | ||
6 | SPU_CROSS := spu- | ||
7 | SPU_CC := $(SPU_CROSS)gcc | ||
8 | SPU_AS := $(SPU_CROSS)gcc | ||
9 | SPU_LD := $(SPU_CROSS)ld | ||
10 | SPU_OBJCOPY := $(SPU_CROSS)objcopy | ||
11 | SPU_CFLAGS := -O2 -Wall -I$(srctree)/include -I$(objtree)/include2 | ||
12 | SPU_AFLAGS := -c -D__ASSEMBLY__ -I$(srctree)/include -I$(objtree)/include2 | ||
13 | SPU_LDFLAGS := -N -Ttext=0x0 | ||
14 | |||
5 | $(obj)/switch.o: $(obj)/spu_save_dump.h $(obj)/spu_restore_dump.h | 15 | $(obj)/switch.o: $(obj)/spu_save_dump.h $(obj)/spu_restore_dump.h |
16 | |||
17 | # Compile SPU files | ||
18 | cmd_spu_cc = $(SPU_CC) $(SPU_CFLAGS) -c -o $@ $< | ||
19 | quiet_cmd_spu_cc = SPU_CC $@ | ||
20 | $(obj)/spu_%.o: $(src)/spu_%.c | ||
21 | $(call if_changed,spu_cc) | ||
22 | |||
23 | # Assemble SPU files | ||
24 | cmd_spu_as = $(SPU_AS) $(SPU_AFLAGS) -o $@ $< | ||
25 | quiet_cmd_spu_as = SPU_AS $@ | ||
26 | $(obj)/spu_%.o: $(src)/spu_%.S | ||
27 | $(call if_changed,spu_as) | ||
28 | |||
29 | # Link SPU Executables | ||
30 | cmd_spu_ld = $(SPU_LD) $(SPU_LDFLAGS) -o $@ $^ | ||
31 | quiet_cmd_spu_ld = SPU_LD $@ | ||
32 | $(obj)/spu_%: $(obj)/spu_%_crt0.o $(obj)/spu_%.o | ||
33 | $(call if_changed,spu_ld) | ||
34 | |||
35 | # Copy into binary format | ||
36 | cmd_spu_objcopy = $(SPU_OBJCOPY) -O binary $< $@ | ||
37 | quiet_cmd_spu_objcopy = OBJCOPY $@ | ||
38 | $(obj)/spu_%.bin: $(src)/spu_% | ||
39 | $(call if_changed,spu_objcopy) | ||
40 | |||
41 | # create C code from ELF executable | ||
42 | cmd_hexdump = ( \ | ||
43 | echo "/*" ; \ | ||
44 | echo " * $*_dump.h: Copyright (C) 2005 IBM." ; \ | ||
45 | echo " * Hex-dump auto generated from $*.c." ; \ | ||
46 | echo " * Do not edit!" ; \ | ||
47 | echo " */" ; \ | ||
48 | echo "static unsigned int $*_code[] __page_aligned = {" ; \ | ||
49 | hexdump -v -e '4/4 "0x%08x, " "\n"' $< ; \ | ||
50 | echo "};" ; \ | ||
51 | ) > $@ | ||
52 | quiet_cmd_hexdump = HEXDUMP $@ | ||
53 | $(obj)/%_dump.h: $(obj)/%.bin | ||
54 | $(call if_changed,hexdump) | ||
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c new file mode 100644 index 000000000000..0bf723dcd677 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c | |||
@@ -0,0 +1,336 @@ | |||
1 | /* | ||
2 | * spu_restore.c | ||
3 | * | ||
4 | * (C) Copyright IBM Corp. 2005 | ||
5 | * | ||
6 | * SPU-side context restore sequence outlined in | ||
7 | * Synergistic Processor Element Book IV | ||
8 | * | ||
9 | * Author: Mark Nutter <mnutter@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | |||
28 | #ifndef LS_SIZE | ||
29 | #define LS_SIZE 0x40000 /* 256K (in bytes) */ | ||
30 | #endif | ||
31 | |||
32 | typedef unsigned int u32; | ||
33 | typedef unsigned long long u64; | ||
34 | |||
35 | #include <spu_intrinsics.h> | ||
36 | #include <asm/spu_csa.h> | ||
37 | #include "spu_utils.h" | ||
38 | |||
39 | #define BR_INSTR 0x327fff80 /* br -4 */ | ||
40 | #define NOP_INSTR 0x40200000 /* nop */ | ||
41 | #define HEQ_INSTR 0x7b000000 /* heq $0, $0 */ | ||
42 | #define STOP_INSTR 0x00000000 /* stop 0x0 */ | ||
43 | #define ILLEGAL_INSTR 0x00800000 /* illegal instr */ | ||
44 | #define RESTORE_COMPLETE 0x00003ffc /* stop 0x3ffc */ | ||
45 | |||
46 | static inline void fetch_regs_from_mem(addr64 lscsa_ea) | ||
47 | { | ||
48 | unsigned int ls = (unsigned int)®s_spill[0]; | ||
49 | unsigned int size = sizeof(regs_spill); | ||
50 | unsigned int tag_id = 0; | ||
51 | unsigned int cmd = 0x40; /* GET */ | ||
52 | |||
53 | spu_writech(MFC_LSA, ls); | ||
54 | spu_writech(MFC_EAH, lscsa_ea.ui[0]); | ||
55 | spu_writech(MFC_EAL, lscsa_ea.ui[1]); | ||
56 | spu_writech(MFC_Size, size); | ||
57 | spu_writech(MFC_TagID, tag_id); | ||
58 | spu_writech(MFC_Cmd, cmd); | ||
59 | } | ||
60 | |||
61 | static inline void restore_upper_240kb(addr64 lscsa_ea) | ||
62 | { | ||
63 | unsigned int ls = 16384; | ||
64 | unsigned int list = (unsigned int)&dma_list[0]; | ||
65 | unsigned int size = sizeof(dma_list); | ||
66 | unsigned int tag_id = 0; | ||
67 | unsigned int cmd = 0x44; /* GETL */ | ||
68 | |||
69 | /* Restore, Step 4: | ||
70 | * Enqueue the GETL command (tag 0) to the MFC SPU command | ||
71 | * queue to transfer the upper 240 kb of LS from CSA. | ||
72 | */ | ||
73 | spu_writech(MFC_LSA, ls); | ||
74 | spu_writech(MFC_EAH, lscsa_ea.ui[0]); | ||
75 | spu_writech(MFC_EAL, list); | ||
76 | spu_writech(MFC_Size, size); | ||
77 | spu_writech(MFC_TagID, tag_id); | ||
78 | spu_writech(MFC_Cmd, cmd); | ||
79 | } | ||
80 | |||
81 | static inline void restore_decr(void) | ||
82 | { | ||
83 | unsigned int offset; | ||
84 | unsigned int decr_running; | ||
85 | unsigned int decr; | ||
86 | |||
87 | /* Restore, Step 6: | ||
88 | * If the LSCSA "decrementer running" flag is set | ||
89 | * then write the SPU_WrDec channel with the | ||
90 | * decrementer value from LSCSA. | ||
91 | */ | ||
92 | offset = LSCSA_QW_OFFSET(decr_status); | ||
93 | decr_running = regs_spill[offset].slot[0]; | ||
94 | if (decr_running) { | ||
95 | offset = LSCSA_QW_OFFSET(decr); | ||
96 | decr = regs_spill[offset].slot[0]; | ||
97 | spu_writech(SPU_WrDec, decr); | ||
98 | } | ||
99 | } | ||
100 | |||
101 | static inline void write_ppu_mb(void) | ||
102 | { | ||
103 | unsigned int offset; | ||
104 | unsigned int data; | ||
105 | |||
106 | /* Restore, Step 11: | ||
107 | * Write the MFC_WrOut_MB channel with the PPU_MB | ||
108 | * data from LSCSA. | ||
109 | */ | ||
110 | offset = LSCSA_QW_OFFSET(ppu_mb); | ||
111 | data = regs_spill[offset].slot[0]; | ||
112 | spu_writech(SPU_WrOutMbox, data); | ||
113 | } | ||
114 | |||
115 | static inline void write_ppuint_mb(void) | ||
116 | { | ||
117 | unsigned int offset; | ||
118 | unsigned int data; | ||
119 | |||
120 | /* Restore, Step 12: | ||
121 | * Write the MFC_WrInt_MB channel with the PPUINT_MB | ||
122 | * data from LSCSA. | ||
123 | */ | ||
124 | offset = LSCSA_QW_OFFSET(ppuint_mb); | ||
125 | data = regs_spill[offset].slot[0]; | ||
126 | spu_writech(SPU_WrOutIntrMbox, data); | ||
127 | } | ||
128 | |||
129 | static inline void restore_fpcr(void) | ||
130 | { | ||
131 | unsigned int offset; | ||
132 | vector unsigned int fpcr; | ||
133 | |||
134 | /* Restore, Step 13: | ||
135 | * Restore the floating-point status and control | ||
136 | * register from the LSCSA. | ||
137 | */ | ||
138 | offset = LSCSA_QW_OFFSET(fpcr); | ||
139 | fpcr = regs_spill[offset].v; | ||
140 | spu_mtfpscr(fpcr); | ||
141 | } | ||
142 | |||
143 | static inline void restore_srr0(void) | ||
144 | { | ||
145 | unsigned int offset; | ||
146 | unsigned int srr0; | ||
147 | |||
148 | /* Restore, Step 14: | ||
149 | * Restore the SPU SRR0 data from the LSCSA. | ||
150 | */ | ||
151 | offset = LSCSA_QW_OFFSET(srr0); | ||
152 | srr0 = regs_spill[offset].slot[0]; | ||
153 | spu_writech(SPU_WrSRR0, srr0); | ||
154 | } | ||
155 | |||
156 | static inline void restore_event_mask(void) | ||
157 | { | ||
158 | unsigned int offset; | ||
159 | unsigned int event_mask; | ||
160 | |||
161 | /* Restore, Step 15: | ||
162 | * Restore the SPU_RdEventMsk data from the LSCSA. | ||
163 | */ | ||
164 | offset = LSCSA_QW_OFFSET(event_mask); | ||
165 | event_mask = regs_spill[offset].slot[0]; | ||
166 | spu_writech(SPU_WrEventMask, event_mask); | ||
167 | } | ||
168 | |||
169 | static inline void restore_tag_mask(void) | ||
170 | { | ||
171 | unsigned int offset; | ||
172 | unsigned int tag_mask; | ||
173 | |||
174 | /* Restore, Step 16: | ||
175 | * Restore the SPU_RdTagMsk data from the LSCSA. | ||
176 | */ | ||
177 | offset = LSCSA_QW_OFFSET(tag_mask); | ||
178 | tag_mask = regs_spill[offset].slot[0]; | ||
179 | spu_writech(MFC_WrTagMask, tag_mask); | ||
180 | } | ||
181 | |||
182 | static inline void restore_complete(void) | ||
183 | { | ||
184 | extern void exit_fini(void); | ||
185 | unsigned int *exit_instrs = (unsigned int *)exit_fini; | ||
186 | unsigned int offset; | ||
187 | unsigned int stopped_status; | ||
188 | unsigned int stopped_code; | ||
189 | |||
190 | /* Restore, Step 18: | ||
191 | * Issue a stop-and-signal instruction with | ||
192 | * "good context restore" signal value. | ||
193 | * | ||
194 | * Restore, Step 19: | ||
195 | * There may be additional instructions placed | ||
196 | * here by the PPE Sequence for SPU Context | ||
197 | * Restore in order to restore the correct | ||
198 | * "stopped state". | ||
199 | * | ||
200 | * This step is handled here by analyzing the | ||
201 | * LSCSA.stopped_status and then modifying the | ||
202 | * exit() function to behave appropriately. | ||
203 | */ | ||
204 | |||
205 | offset = LSCSA_QW_OFFSET(stopped_status); | ||
206 | stopped_status = regs_spill[offset].slot[0]; | ||
207 | stopped_code = regs_spill[offset].slot[1]; | ||
208 | |||
209 | switch (stopped_status) { | ||
210 | case SPU_STOPPED_STATUS_P_I: | ||
211 | /* SPU_Status[P,I]=1. Add illegal instruction | ||
212 | * followed by stop-and-signal instruction after | ||
213 | * end of restore code. | ||
214 | */ | ||
215 | exit_instrs[0] = RESTORE_COMPLETE; | ||
216 | exit_instrs[1] = ILLEGAL_INSTR; | ||
217 | exit_instrs[2] = STOP_INSTR | stopped_code; | ||
218 | break; | ||
219 | case SPU_STOPPED_STATUS_P_H: | ||
220 | /* SPU_Status[P,H]=1. Add 'heq $0, $0' followed | ||
221 | * by stop-and-signal instruction after end of | ||
222 | * restore code. | ||
223 | */ | ||
224 | exit_instrs[0] = RESTORE_COMPLETE; | ||
225 | exit_instrs[1] = HEQ_INSTR; | ||
226 | exit_instrs[2] = STOP_INSTR | stopped_code; | ||
227 | break; | ||
228 | case SPU_STOPPED_STATUS_S_P: | ||
229 | /* SPU_Status[S,P]=1. Add nop instruction | ||
230 | * followed by 'br -4' after end of restore | ||
231 | * code. | ||
232 | */ | ||
233 | exit_instrs[0] = RESTORE_COMPLETE; | ||
234 | exit_instrs[1] = STOP_INSTR | stopped_code; | ||
235 | exit_instrs[2] = NOP_INSTR; | ||
236 | exit_instrs[3] = BR_INSTR; | ||
237 | break; | ||
238 | case SPU_STOPPED_STATUS_S_I: | ||
239 | /* SPU_Status[S,I]=1. Add illegal instruction | ||
240 | * followed by 'br -4' after end of restore code. | ||
241 | */ | ||
242 | exit_instrs[0] = RESTORE_COMPLETE; | ||
243 | exit_instrs[1] = ILLEGAL_INSTR; | ||
244 | exit_instrs[2] = NOP_INSTR; | ||
245 | exit_instrs[3] = BR_INSTR; | ||
246 | break; | ||
247 | case SPU_STOPPED_STATUS_I: | ||
248 | /* SPU_Status[I]=1. Add illegal instruction followed | ||
249 | * by infinite loop after end of restore sequence. | ||
250 | */ | ||
251 | exit_instrs[0] = RESTORE_COMPLETE; | ||
252 | exit_instrs[1] = ILLEGAL_INSTR; | ||
253 | exit_instrs[2] = NOP_INSTR; | ||
254 | exit_instrs[3] = BR_INSTR; | ||
255 | break; | ||
256 | case SPU_STOPPED_STATUS_S: | ||
257 | /* SPU_Status[S]=1. Add two 'nop' instructions. */ | ||
258 | exit_instrs[0] = RESTORE_COMPLETE; | ||
259 | exit_instrs[1] = NOP_INSTR; | ||
260 | exit_instrs[2] = NOP_INSTR; | ||
261 | exit_instrs[3] = BR_INSTR; | ||
262 | break; | ||
263 | case SPU_STOPPED_STATUS_H: | ||
264 | /* SPU_Status[H]=1. Add 'heq $0, $0' instruction | ||
265 | * after end of restore code. | ||
266 | */ | ||
267 | exit_instrs[0] = RESTORE_COMPLETE; | ||
268 | exit_instrs[1] = HEQ_INSTR; | ||
269 | exit_instrs[2] = NOP_INSTR; | ||
270 | exit_instrs[3] = BR_INSTR; | ||
271 | break; | ||
272 | case SPU_STOPPED_STATUS_P: | ||
273 | /* SPU_Status[P]=1. Add stop-and-signal instruction | ||
274 | * after end of restore code. | ||
275 | */ | ||
276 | exit_instrs[0] = RESTORE_COMPLETE; | ||
277 | exit_instrs[1] = STOP_INSTR | stopped_code; | ||
278 | break; | ||
279 | case SPU_STOPPED_STATUS_R: | ||
280 | /* SPU_Status[I,S,H,P,R]=0. Add infinite loop. */ | ||
281 | exit_instrs[0] = RESTORE_COMPLETE; | ||
282 | exit_instrs[1] = NOP_INSTR; | ||
283 | exit_instrs[2] = NOP_INSTR; | ||
284 | exit_instrs[3] = BR_INSTR; | ||
285 | break; | ||
286 | default: | ||
287 | /* SPU_Status[R]=1. No additonal instructions. */ | ||
288 | break; | ||
289 | } | ||
290 | spu_sync(); | ||
291 | } | ||
292 | |||
293 | /** | ||
294 | * main - entry point for SPU-side context restore. | ||
295 | * | ||
296 | * This code deviates from the documented sequence in the | ||
297 | * following aspects: | ||
298 | * | ||
299 | * 1. The EA for LSCSA is passed from PPE in the | ||
300 | * signal notification channels. | ||
301 | * 2. The register spill area is pulled by SPU | ||
302 | * into LS, rather than pushed by PPE. | ||
303 | * 3. All 128 registers are restored by exit(). | ||
304 | * 4. The exit() function is modified at run | ||
305 | * time in order to properly restore the | ||
306 | * SPU_Status register. | ||
307 | */ | ||
308 | int main() | ||
309 | { | ||
310 | addr64 lscsa_ea; | ||
311 | |||
312 | lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1); | ||
313 | lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2); | ||
314 | fetch_regs_from_mem(lscsa_ea); | ||
315 | |||
316 | set_event_mask(); /* Step 1. */ | ||
317 | set_tag_mask(); /* Step 2. */ | ||
318 | build_dma_list(lscsa_ea); /* Step 3. */ | ||
319 | restore_upper_240kb(lscsa_ea); /* Step 4. */ | ||
320 | /* Step 5: done by 'exit'. */ | ||
321 | restore_decr(); /* Step 6. */ | ||
322 | enqueue_putllc(lscsa_ea); /* Step 7. */ | ||
323 | set_tag_update(); /* Step 8. */ | ||
324 | read_tag_status(); /* Step 9. */ | ||
325 | read_llar_status(); /* Step 10. */ | ||
326 | write_ppu_mb(); /* Step 11. */ | ||
327 | write_ppuint_mb(); /* Step 12. */ | ||
328 | restore_fpcr(); /* Step 13. */ | ||
329 | restore_srr0(); /* Step 14. */ | ||
330 | restore_event_mask(); /* Step 15. */ | ||
331 | restore_tag_mask(); /* Step 16. */ | ||
332 | /* Step 17. done by 'exit'. */ | ||
333 | restore_complete(); /* Step 18. */ | ||
334 | |||
335 | return 0; | ||
336 | } | ||
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S new file mode 100644 index 000000000000..2905949debe1 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S | |||
@@ -0,0 +1,116 @@ | |||
1 | /* | ||
2 | * crt0_r.S: Entry function for SPU-side context restore. | ||
3 | * | ||
4 | * Copyright (C) 2005 IBM | ||
5 | * | ||
6 | * Entry and exit function for SPU-side of the context restore | ||
7 | * sequence. Sets up an initial stack frame, then branches to | ||
8 | * 'main'. On return, restores all 128 registers from the LSCSA | ||
9 | * and exits. | ||
10 | * | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or modify | ||
13 | * it under the terms of the GNU General Public License as published by | ||
14 | * the Free Software Foundation; either version 2, or (at your option) | ||
15 | * any later version. | ||
16 | * | ||
17 | * This program is distributed in the hope that it will be useful, | ||
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
20 | * GNU General Public License for more details. | ||
21 | * | ||
22 | * You should have received a copy of the GNU General Public License | ||
23 | * along with this program; if not, write to the Free Software | ||
24 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
25 | */ | ||
26 | |||
27 | #include <asm/spu_csa.h> | ||
28 | |||
29 | .data | ||
30 | .align 7 | ||
31 | .globl regs_spill | ||
32 | regs_spill: | ||
33 | .space SIZEOF_SPU_SPILL_REGS, 0x0 | ||
34 | |||
35 | .text | ||
36 | .global _start | ||
37 | _start: | ||
38 | /* Initialize the stack pointer to point to 16368 | ||
39 | * (16kb-16). The back chain pointer is initialized | ||
40 | * to NULL. | ||
41 | */ | ||
42 | il $0, 0 | ||
43 | il $SP, 16368 | ||
44 | stqd $0, 0($SP) | ||
45 | |||
46 | /* Allocate a minimum stack frame for the called main. | ||
47 | * This is needed so that main has a place to save the | ||
48 | * link register when it calls another function. | ||
49 | */ | ||
50 | stqd $SP, -160($SP) | ||
51 | ai $SP, $SP, -160 | ||
52 | |||
53 | /* Call the program's main function. */ | ||
54 | brsl $0, main | ||
55 | |||
56 | .global exit | ||
57 | .global _exit | ||
58 | exit: | ||
59 | _exit: | ||
60 | /* SPU Context Restore, Step 5: Restore the remaining 112 GPRs. */ | ||
61 | ila $3, regs_spill + 256 | ||
62 | restore_regs: | ||
63 | lqr $4, restore_reg_insts | ||
64 | restore_reg_loop: | ||
65 | ai $4, $4, 4 | ||
66 | .balignl 16, 0x40200000 | ||
67 | restore_reg_insts: /* must be quad-word aligned. */ | ||
68 | lqd $16, 0($3) | ||
69 | lqd $17, 16($3) | ||
70 | lqd $18, 32($3) | ||
71 | lqd $19, 48($3) | ||
72 | andi $5, $4, 0x7F | ||
73 | stqr $4, restore_reg_insts | ||
74 | ai $3, $3, 64 | ||
75 | brnz $5, restore_reg_loop | ||
76 | |||
77 | /* SPU Context Restore Step 17: Restore the first 16 GPRs. */ | ||
78 | lqa $0, regs_spill + 0 | ||
79 | lqa $1, regs_spill + 16 | ||
80 | lqa $2, regs_spill + 32 | ||
81 | lqa $3, regs_spill + 48 | ||
82 | lqa $4, regs_spill + 64 | ||
83 | lqa $5, regs_spill + 80 | ||
84 | lqa $6, regs_spill + 96 | ||
85 | lqa $7, regs_spill + 112 | ||
86 | lqa $8, regs_spill + 128 | ||
87 | lqa $9, regs_spill + 144 | ||
88 | lqa $10, regs_spill + 160 | ||
89 | lqa $11, regs_spill + 176 | ||
90 | lqa $12, regs_spill + 192 | ||
91 | lqa $13, regs_spill + 208 | ||
92 | lqa $14, regs_spill + 224 | ||
93 | lqa $15, regs_spill + 240 | ||
94 | |||
95 | /* Under normal circumstances, the 'exit' function | ||
96 | * terminates with 'stop SPU_RESTORE_COMPLETE', | ||
97 | * indicating that the SPU-side restore code has | ||
98 | * completed. | ||
99 | * | ||
100 | * However it is possible that instructions immediately | ||
101 | * following the 'stop 0x3ffc' have been modified at run | ||
102 | * time so as to recreate the exact SPU_Status settings | ||
103 | * from the application, e.g. illegal instruciton, halt, | ||
104 | * etc. | ||
105 | */ | ||
106 | .global exit_fini | ||
107 | .global _exit_fini | ||
108 | exit_fini: | ||
109 | _exit_fini: | ||
110 | stop SPU_RESTORE_COMPLETE | ||
111 | stop 0 | ||
112 | stop 0 | ||
113 | stop 0 | ||
114 | |||
115 | /* Pad the size of this crt0.o to be multiple of 16 bytes. */ | ||
116 | .balignl 16, 0x0 | ||
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save.c b/arch/powerpc/platforms/cell/spufs/spu_save.c new file mode 100644 index 000000000000..196033b8a579 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_save.c | |||
@@ -0,0 +1,195 @@ | |||
1 | /* | ||
2 | * spu_save.c | ||
3 | * | ||
4 | * (C) Copyright IBM Corp. 2005 | ||
5 | * | ||
6 | * SPU-side context save sequence outlined in | ||
7 | * Synergistic Processor Element Book IV | ||
8 | * | ||
9 | * Author: Mark Nutter <mnutter@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | |||
28 | #ifndef LS_SIZE | ||
29 | #define LS_SIZE 0x40000 /* 256K (in bytes) */ | ||
30 | #endif | ||
31 | |||
32 | typedef unsigned int u32; | ||
33 | typedef unsigned long long u64; | ||
34 | |||
35 | #include <spu_intrinsics.h> | ||
36 | #include <asm/spu_csa.h> | ||
37 | #include "spu_utils.h" | ||
38 | |||
39 | static inline void save_event_mask(void) | ||
40 | { | ||
41 | unsigned int offset; | ||
42 | |||
43 | /* Save, Step 2: | ||
44 | * Read the SPU_RdEventMsk channel and save to the LSCSA. | ||
45 | */ | ||
46 | offset = LSCSA_QW_OFFSET(event_mask); | ||
47 | regs_spill[offset].slot[0] = spu_readch(SPU_RdEventStatMask); | ||
48 | } | ||
49 | |||
50 | static inline void save_tag_mask(void) | ||
51 | { | ||
52 | unsigned int offset; | ||
53 | |||
54 | /* Save, Step 3: | ||
55 | * Read the SPU_RdTagMsk channel and save to the LSCSA. | ||
56 | */ | ||
57 | offset = LSCSA_QW_OFFSET(tag_mask); | ||
58 | regs_spill[offset].slot[0] = spu_readch(MFC_RdTagMask); | ||
59 | } | ||
60 | |||
61 | static inline void save_upper_240kb(addr64 lscsa_ea) | ||
62 | { | ||
63 | unsigned int ls = 16384; | ||
64 | unsigned int list = (unsigned int)&dma_list[0]; | ||
65 | unsigned int size = sizeof(dma_list); | ||
66 | unsigned int tag_id = 0; | ||
67 | unsigned int cmd = 0x24; /* PUTL */ | ||
68 | |||
69 | /* Save, Step 7: | ||
70 | * Enqueue the PUTL command (tag 0) to the MFC SPU command | ||
71 | * queue to transfer the remaining 240 kb of LS to CSA. | ||
72 | */ | ||
73 | spu_writech(MFC_LSA, ls); | ||
74 | spu_writech(MFC_EAH, lscsa_ea.ui[0]); | ||
75 | spu_writech(MFC_EAL, list); | ||
76 | spu_writech(MFC_Size, size); | ||
77 | spu_writech(MFC_TagID, tag_id); | ||
78 | spu_writech(MFC_Cmd, cmd); | ||
79 | } | ||
80 | |||
81 | static inline void save_fpcr(void) | ||
82 | { | ||
83 | // vector unsigned int fpcr; | ||
84 | unsigned int offset; | ||
85 | |||
86 | /* Save, Step 9: | ||
87 | * Issue the floating-point status and control register | ||
88 | * read instruction, and save to the LSCSA. | ||
89 | */ | ||
90 | offset = LSCSA_QW_OFFSET(fpcr); | ||
91 | regs_spill[offset].v = spu_mffpscr(); | ||
92 | } | ||
93 | |||
94 | static inline void save_decr(void) | ||
95 | { | ||
96 | unsigned int offset; | ||
97 | |||
98 | /* Save, Step 10: | ||
99 | * Read and save the SPU_RdDec channel data to | ||
100 | * the LSCSA. | ||
101 | */ | ||
102 | offset = LSCSA_QW_OFFSET(decr); | ||
103 | regs_spill[offset].slot[0] = spu_readch(SPU_RdDec); | ||
104 | } | ||
105 | |||
106 | static inline void save_srr0(void) | ||
107 | { | ||
108 | unsigned int offset; | ||
109 | |||
110 | /* Save, Step 11: | ||
111 | * Read and save the SPU_WSRR0 channel data to | ||
112 | * the LSCSA. | ||
113 | */ | ||
114 | offset = LSCSA_QW_OFFSET(srr0); | ||
115 | regs_spill[offset].slot[0] = spu_readch(SPU_RdSRR0); | ||
116 | } | ||
117 | |||
118 | static inline void spill_regs_to_mem(addr64 lscsa_ea) | ||
119 | { | ||
120 | unsigned int ls = (unsigned int)®s_spill[0]; | ||
121 | unsigned int size = sizeof(regs_spill); | ||
122 | unsigned int tag_id = 0; | ||
123 | unsigned int cmd = 0x20; /* PUT */ | ||
124 | |||
125 | /* Save, Step 13: | ||
126 | * Enqueue a PUT command (tag 0) to send the LSCSA | ||
127 | * to the CSA. | ||
128 | */ | ||
129 | spu_writech(MFC_LSA, ls); | ||
130 | spu_writech(MFC_EAH, lscsa_ea.ui[0]); | ||
131 | spu_writech(MFC_EAL, lscsa_ea.ui[1]); | ||
132 | spu_writech(MFC_Size, size); | ||
133 | spu_writech(MFC_TagID, tag_id); | ||
134 | spu_writech(MFC_Cmd, cmd); | ||
135 | } | ||
136 | |||
137 | static inline void enqueue_sync(addr64 lscsa_ea) | ||
138 | { | ||
139 | unsigned int tag_id = 0; | ||
140 | unsigned int cmd = 0xCC; | ||
141 | |||
142 | /* Save, Step 14: | ||
143 | * Enqueue an MFC_SYNC command (tag 0). | ||
144 | */ | ||
145 | spu_writech(MFC_TagID, tag_id); | ||
146 | spu_writech(MFC_Cmd, cmd); | ||
147 | } | ||
148 | |||
149 | static inline void save_complete(void) | ||
150 | { | ||
151 | /* Save, Step 18: | ||
152 | * Issue a stop-and-signal instruction indicating | ||
153 | * "save complete". Note: This function will not | ||
154 | * return!! | ||
155 | */ | ||
156 | spu_stop(SPU_SAVE_COMPLETE); | ||
157 | } | ||
158 | |||
159 | /** | ||
160 | * main - entry point for SPU-side context save. | ||
161 | * | ||
162 | * This code deviates from the documented sequence as follows: | ||
163 | * | ||
164 | * 1. The EA for LSCSA is passed from PPE in the | ||
165 | * signal notification channels. | ||
166 | * 2. All 128 registers are saved by crt0.o. | ||
167 | */ | ||
168 | int main() | ||
169 | { | ||
170 | addr64 lscsa_ea; | ||
171 | |||
172 | lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1); | ||
173 | lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2); | ||
174 | |||
175 | /* Step 1: done by exit(). */ | ||
176 | save_event_mask(); /* Step 2. */ | ||
177 | save_tag_mask(); /* Step 3. */ | ||
178 | set_event_mask(); /* Step 4. */ | ||
179 | set_tag_mask(); /* Step 5. */ | ||
180 | build_dma_list(lscsa_ea); /* Step 6. */ | ||
181 | save_upper_240kb(lscsa_ea); /* Step 7. */ | ||
182 | /* Step 8: done by exit(). */ | ||
183 | save_fpcr(); /* Step 9. */ | ||
184 | save_decr(); /* Step 10. */ | ||
185 | save_srr0(); /* Step 11. */ | ||
186 | enqueue_putllc(lscsa_ea); /* Step 12. */ | ||
187 | spill_regs_to_mem(lscsa_ea); /* Step 13. */ | ||
188 | enqueue_sync(lscsa_ea); /* Step 14. */ | ||
189 | set_tag_update(); /* Step 15. */ | ||
190 | read_tag_status(); /* Step 16. */ | ||
191 | read_llar_status(); /* Step 17. */ | ||
192 | save_complete(); /* Step 18. */ | ||
193 | |||
194 | return 0; | ||
195 | } | ||
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S new file mode 100644 index 000000000000..6659d6a66faa --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S | |||
@@ -0,0 +1,102 @@ | |||
1 | /* | ||
2 | * crt0_s.S: Entry function for SPU-side context save. | ||
3 | * | ||
4 | * Copyright (C) 2005 IBM | ||
5 | * | ||
6 | * Entry function for SPU-side of the context save sequence. | ||
7 | * Saves all 128 GPRs, sets up an initial stack frame, then | ||
8 | * branches to 'main'. | ||
9 | * | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
24 | */ | ||
25 | |||
26 | #include <asm/spu_csa.h> | ||
27 | |||
28 | .data | ||
29 | .align 7 | ||
30 | .globl regs_spill | ||
31 | regs_spill: | ||
32 | .space SIZEOF_SPU_SPILL_REGS, 0x0 | ||
33 | |||
34 | .text | ||
35 | .global _start | ||
36 | _start: | ||
37 | /* SPU Context Save Step 1: Save the first 16 GPRs. */ | ||
38 | stqa $0, regs_spill + 0 | ||
39 | stqa $1, regs_spill + 16 | ||
40 | stqa $2, regs_spill + 32 | ||
41 | stqa $3, regs_spill + 48 | ||
42 | stqa $4, regs_spill + 64 | ||
43 | stqa $5, regs_spill + 80 | ||
44 | stqa $6, regs_spill + 96 | ||
45 | stqa $7, regs_spill + 112 | ||
46 | stqa $8, regs_spill + 128 | ||
47 | stqa $9, regs_spill + 144 | ||
48 | stqa $10, regs_spill + 160 | ||
49 | stqa $11, regs_spill + 176 | ||
50 | stqa $12, regs_spill + 192 | ||
51 | stqa $13, regs_spill + 208 | ||
52 | stqa $14, regs_spill + 224 | ||
53 | stqa $15, regs_spill + 240 | ||
54 | |||
55 | /* SPU Context Save, Step 8: Save the remaining 112 GPRs. */ | ||
56 | ila $3, regs_spill + 256 | ||
57 | save_regs: | ||
58 | lqr $4, save_reg_insts | ||
59 | save_reg_loop: | ||
60 | ai $4, $4, 4 | ||
61 | .balignl 16, 0x40200000 | ||
62 | save_reg_insts: /* must be quad-word aligned. */ | ||
63 | stqd $16, 0($3) | ||
64 | stqd $17, 16($3) | ||
65 | stqd $18, 32($3) | ||
66 | stqd $19, 48($3) | ||
67 | andi $5, $4, 0x7F | ||
68 | stqr $4, save_reg_insts | ||
69 | ai $3, $3, 64 | ||
70 | brnz $5, save_reg_loop | ||
71 | |||
72 | /* Initialize the stack pointer to point to 16368 | ||
73 | * (16kb-16). The back chain pointer is initialized | ||
74 | * to NULL. | ||
75 | */ | ||
76 | il $0, 0 | ||
77 | il $SP, 16368 | ||
78 | stqd $0, 0($SP) | ||
79 | |||
80 | /* Allocate a minimum stack frame for the called main. | ||
81 | * This is needed so that main has a place to save the | ||
82 | * link register when it calls another function. | ||
83 | */ | ||
84 | stqd $SP, -160($SP) | ||
85 | ai $SP, $SP, -160 | ||
86 | |||
87 | /* Call the program's main function. */ | ||
88 | brsl $0, main | ||
89 | |||
90 | /* In this case main should not return; if it does | ||
91 | * there has been an error in the sequence. Execute | ||
92 | * stop-and-signal with code=0. | ||
93 | */ | ||
94 | .global exit | ||
95 | .global _exit | ||
96 | exit: | ||
97 | _exit: | ||
98 | stop 0x0 | ||
99 | |||
100 | /* Pad the size of this crt0.o to be multiple of 16 bytes. */ | ||
101 | .balignl 16, 0x0 | ||
102 | |||
diff --git a/arch/powerpc/platforms/cell/spufs/spu_utils.h b/arch/powerpc/platforms/cell/spufs/spu_utils.h new file mode 100644 index 000000000000..58359feb6c95 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_utils.h | |||
@@ -0,0 +1,160 @@ | |||
1 | /* | ||
2 | * utils.h: Utilities for SPU-side of the context switch operation. | ||
3 | * | ||
4 | * (C) Copyright IBM 2005 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
19 | */ | ||
20 | |||
21 | #ifndef _SPU_CONTEXT_UTILS_H_ | ||
22 | #define _SPU_CONTEXT_UTILS_H_ | ||
23 | |||
24 | /* | ||
25 | * 64-bit safe EA. | ||
26 | */ | ||
27 | typedef union { | ||
28 | unsigned long long ull; | ||
29 | unsigned int ui[2]; | ||
30 | } addr64; | ||
31 | |||
32 | /* | ||
33 | * 128-bit register template. | ||
34 | */ | ||
35 | typedef union { | ||
36 | unsigned int slot[4]; | ||
37 | vector unsigned int v; | ||
38 | } spu_reg128v; | ||
39 | |||
40 | /* | ||
41 | * DMA list structure. | ||
42 | */ | ||
43 | struct dma_list_elem { | ||
44 | unsigned int size; | ||
45 | unsigned int ea_low; | ||
46 | }; | ||
47 | |||
48 | /* | ||
49 | * Declare storage for 8-byte aligned DMA list. | ||
50 | */ | ||
51 | struct dma_list_elem dma_list[15] __attribute__ ((aligned(8))); | ||
52 | |||
53 | /* | ||
54 | * External definition for storage | ||
55 | * declared in crt0. | ||
56 | */ | ||
57 | extern spu_reg128v regs_spill[NR_SPU_SPILL_REGS]; | ||
58 | |||
59 | /* | ||
60 | * Compute LSCSA byte offset for a given field. | ||
61 | */ | ||
62 | static struct spu_lscsa *dummy = (struct spu_lscsa *)0; | ||
63 | #define LSCSA_BYTE_OFFSET(_field) \ | ||
64 | ((char *)(&(dummy->_field)) - (char *)(&(dummy->gprs[0].slot[0]))) | ||
65 | #define LSCSA_QW_OFFSET(_field) (LSCSA_BYTE_OFFSET(_field) >> 4) | ||
66 | |||
67 | static inline void set_event_mask(void) | ||
68 | { | ||
69 | unsigned int event_mask = 0; | ||
70 | |||
71 | /* Save, Step 4: | ||
72 | * Restore, Step 1: | ||
73 | * Set the SPU_RdEventMsk channel to zero to mask | ||
74 | * all events. | ||
75 | */ | ||
76 | spu_writech(SPU_WrEventMask, event_mask); | ||
77 | } | ||
78 | |||
79 | static inline void set_tag_mask(void) | ||
80 | { | ||
81 | unsigned int tag_mask = 1; | ||
82 | |||
83 | /* Save, Step 5: | ||
84 | * Restore, Step 2: | ||
85 | * Set the SPU_WrTagMsk channel to '01' to unmask | ||
86 | * only tag group 0. | ||
87 | */ | ||
88 | spu_writech(MFC_WrTagMask, tag_mask); | ||
89 | } | ||
90 | |||
91 | static inline void build_dma_list(addr64 lscsa_ea) | ||
92 | { | ||
93 | unsigned int ea_low; | ||
94 | int i; | ||
95 | |||
96 | /* Save, Step 6: | ||
97 | * Restore, Step 3: | ||
98 | * Update the effective address for the CSA in the | ||
99 | * pre-canned DMA-list in local storage. | ||
100 | */ | ||
101 | ea_low = lscsa_ea.ui[1]; | ||
102 | ea_low += LSCSA_BYTE_OFFSET(ls[16384]); | ||
103 | |||
104 | for (i = 0; i < 15; i++, ea_low += 16384) { | ||
105 | dma_list[i].size = 16384; | ||
106 | dma_list[i].ea_low = ea_low; | ||
107 | } | ||
108 | } | ||
109 | |||
110 | static inline void enqueue_putllc(addr64 lscsa_ea) | ||
111 | { | ||
112 | unsigned int ls = 0; | ||
113 | unsigned int size = 128; | ||
114 | unsigned int tag_id = 0; | ||
115 | unsigned int cmd = 0xB4; /* PUTLLC */ | ||
116 | |||
117 | /* Save, Step 12: | ||
118 | * Restore, Step 7: | ||
119 | * Send a PUTLLC (tag 0) command to the MFC using | ||
120 | * an effective address in the CSA in order to | ||
121 | * remove any possible lock-line reservation. | ||
122 | */ | ||
123 | spu_writech(MFC_LSA, ls); | ||
124 | spu_writech(MFC_EAH, lscsa_ea.ui[0]); | ||
125 | spu_writech(MFC_EAL, lscsa_ea.ui[1]); | ||
126 | spu_writech(MFC_Size, size); | ||
127 | spu_writech(MFC_TagID, tag_id); | ||
128 | spu_writech(MFC_Cmd, cmd); | ||
129 | } | ||
130 | |||
131 | static inline void set_tag_update(void) | ||
132 | { | ||
133 | unsigned int update_any = 1; | ||
134 | |||
135 | /* Save, Step 15: | ||
136 | * Restore, Step 8: | ||
137 | * Write the MFC_TagUpdate channel with '01'. | ||
138 | */ | ||
139 | spu_writech(MFC_WrTagUpdate, update_any); | ||
140 | } | ||
141 | |||
142 | static inline void read_tag_status(void) | ||
143 | { | ||
144 | /* Save, Step 16: | ||
145 | * Restore, Step 9: | ||
146 | * Read the MFC_TagStat channel data. | ||
147 | */ | ||
148 | spu_readch(MFC_RdTagStat); | ||
149 | } | ||
150 | |||
151 | static inline void read_llar_status(void) | ||
152 | { | ||
153 | /* Save, Step 17: | ||
154 | * Restore, Step 10: | ||
155 | * Read the MFC_AtomicStat channel data. | ||
156 | */ | ||
157 | spu_readch(MFC_RdAtomicStat); | ||
158 | } | ||
159 | |||
160 | #endif /* _SPU_CONTEXT_UTILS_H_ */ | ||