diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau/nvc0_grhub.fuc')
-rw-r--r-- | drivers/gpu/drm/nouveau/nvc0_grhub.fuc | 808 |
1 files changed, 808 insertions, 0 deletions
diff --git a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc new file mode 100644 index 000000000000..a1a599124cf4 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc | |||
@@ -0,0 +1,808 @@ | |||
1 | /* fuc microcode for nvc0 PGRAPH/HUB | ||
2 | * | ||
3 | * Copyright 2011 Red Hat Inc. | ||
4 | * | ||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
6 | * copy of this software and associated documentation files (the "Software"), | ||
7 | * to deal in the Software without restriction, including without limitation | ||
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
9 | * and/or sell copies of the Software, and to permit persons to whom the | ||
10 | * Software is furnished to do so, subject to the following conditions: | ||
11 | * | ||
12 | * The above copyright notice and this permission notice shall be included in | ||
13 | * all copies or substantial portions of the Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
21 | * OTHER DEALINGS IN THE SOFTWARE. | ||
22 | * | ||
23 | * Authors: Ben Skeggs | ||
24 | */ | ||
25 | |||
26 | /* To build: | ||
27 | * m4 nvc0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grhub.fuc.h | ||
28 | */ | ||
29 | |||
30 | .section nvc0_grhub_data | ||
31 | include(`nvc0_graph.fuc') | ||
32 | gpc_count: .b32 0 | ||
33 | rop_count: .b32 0 | ||
34 | cmd_queue: queue_init | ||
35 | hub_mmio_list_head: .b32 0 | ||
36 | hub_mmio_list_tail: .b32 0 | ||
37 | |||
38 | ctx_current: .b32 0 | ||
39 | |||
40 | chipsets: | ||
41 | .b8 0xc0 0 0 0 | ||
42 | .b16 nvc0_hub_mmio_head | ||
43 | .b16 nvc0_hub_mmio_tail | ||
44 | .b8 0xc1 0 0 0 | ||
45 | .b16 nvc0_hub_mmio_head | ||
46 | .b16 nvc1_hub_mmio_tail | ||
47 | .b8 0xc3 0 0 0 | ||
48 | .b16 nvc0_hub_mmio_head | ||
49 | .b16 nvc0_hub_mmio_tail | ||
50 | .b8 0xc4 0 0 0 | ||
51 | .b16 nvc0_hub_mmio_head | ||
52 | .b16 nvc0_hub_mmio_tail | ||
53 | .b8 0xc8 0 0 0 | ||
54 | .b16 nvc0_hub_mmio_head | ||
55 | .b16 nvc0_hub_mmio_tail | ||
56 | .b8 0xce 0 0 0 | ||
57 | .b16 nvc0_hub_mmio_head | ||
58 | .b16 nvc0_hub_mmio_tail | ||
59 | .b8 0 0 0 0 | ||
60 | |||
61 | nvc0_hub_mmio_head: | ||
62 | mmctx_data(0x17e91c, 2) | ||
63 | mmctx_data(0x400204, 2) | ||
64 | mmctx_data(0x404004, 11) | ||
65 | mmctx_data(0x404044, 1) | ||
66 | mmctx_data(0x404094, 14) | ||
67 | mmctx_data(0x4040d0, 7) | ||
68 | mmctx_data(0x4040f8, 1) | ||
69 | mmctx_data(0x404130, 3) | ||
70 | mmctx_data(0x404150, 3) | ||
71 | mmctx_data(0x404164, 2) | ||
72 | mmctx_data(0x404174, 3) | ||
73 | mmctx_data(0x404200, 8) | ||
74 | mmctx_data(0x404404, 14) | ||
75 | mmctx_data(0x404460, 4) | ||
76 | mmctx_data(0x404480, 1) | ||
77 | mmctx_data(0x404498, 1) | ||
78 | mmctx_data(0x404604, 4) | ||
79 | mmctx_data(0x404618, 32) | ||
80 | mmctx_data(0x404698, 21) | ||
81 | mmctx_data(0x4046f0, 2) | ||
82 | mmctx_data(0x404700, 22) | ||
83 | mmctx_data(0x405800, 1) | ||
84 | mmctx_data(0x405830, 3) | ||
85 | mmctx_data(0x405854, 1) | ||
86 | mmctx_data(0x405870, 4) | ||
87 | mmctx_data(0x405a00, 2) | ||
88 | mmctx_data(0x405a18, 1) | ||
89 | mmctx_data(0x406020, 1) | ||
90 | mmctx_data(0x406028, 4) | ||
91 | mmctx_data(0x4064a8, 2) | ||
92 | mmctx_data(0x4064b4, 2) | ||
93 | mmctx_data(0x407804, 1) | ||
94 | mmctx_data(0x40780c, 6) | ||
95 | mmctx_data(0x4078bc, 1) | ||
96 | mmctx_data(0x408000, 7) | ||
97 | mmctx_data(0x408064, 1) | ||
98 | mmctx_data(0x408800, 3) | ||
99 | mmctx_data(0x408900, 4) | ||
100 | mmctx_data(0x408980, 1) | ||
101 | nvc0_hub_mmio_tail: | ||
102 | mmctx_data(0x4064c0, 2) | ||
103 | nvc1_hub_mmio_tail: | ||
104 | |||
105 | .align 256 | ||
106 | chan_data: | ||
107 | chan_mmio_count: .b32 0 | ||
108 | chan_mmio_address: .b32 0 | ||
109 | |||
110 | .align 256 | ||
111 | xfer_data: .b32 0 | ||
112 | |||
113 | .section nvc0_grhub_code | ||
114 | bra init | ||
115 | define(`include_code') | ||
116 | include(`nvc0_graph.fuc') | ||
117 | |||
118 | // reports an exception to the host | ||
119 | // | ||
120 | // In: $r15 error code (see nvc0_graph.fuc) | ||
121 | // | ||
122 | error: | ||
123 | push $r14 | ||
124 | mov $r14 0x814 | ||
125 | shl b32 $r14 6 | ||
126 | iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code | ||
127 | mov $r14 0xc1c | ||
128 | shl b32 $r14 6 | ||
129 | mov $r15 1 | ||
130 | iowr I[$r14 + 0x000] $r15 // INTR_UP_SET | ||
131 | pop $r14 | ||
132 | ret | ||
133 | |||
134 | // HUB fuc initialisation, executed by triggering ucode start, will | ||
135 | // fall through to main loop after completion. | ||
136 | // | ||
137 | // Input: | ||
138 | // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) | ||
139 | // | ||
140 | // Output: | ||
141 | // CC_SCRATCH[0]: | ||
142 | // 31:31: set to signal completion | ||
143 | // CC_SCRATCH[1]: | ||
144 | // 31:0: total PGRAPH context size | ||
145 | // | ||
146 | init: | ||
147 | clear b32 $r0 | ||
148 | mov $sp $r0 | ||
149 | mov $xdbase $r0 | ||
150 | |||
151 | // enable fifo access | ||
152 | mov $r1 0x1200 | ||
153 | mov $r2 2 | ||
154 | iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE | ||
155 | |||
156 | // setup i0 handler, and route all interrupts to it | ||
157 | mov $r1 ih | ||
158 | mov $iv0 $r1 | ||
159 | mov $r1 0x400 | ||
160 | iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH | ||
161 | |||
162 | // route HUB_CHANNEL_SWITCH to fuc interrupt 8 | ||
163 | mov $r3 0x404 | ||
164 | shl b32 $r3 6 | ||
165 | mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8 | ||
166 | iowr I[$r3 + 0x000] $r2 | ||
167 | |||
168 | // not sure what these are, route them because NVIDIA does, and | ||
169 | // the IRQ handler will signal the host if we ever get one.. we | ||
170 | // may find out if/why we need to handle these if so.. | ||
171 | // | ||
172 | mov $r2 0x2004 | ||
173 | iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9 | ||
174 | mov $r2 0x200b | ||
175 | iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10 | ||
176 | mov $r2 0x200c | ||
177 | iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15 | ||
178 | |||
179 | // enable all INTR_UP interrupts | ||
180 | mov $r2 0xc24 | ||
181 | shl b32 $r2 6 | ||
182 | not b32 $r3 $r0 | ||
183 | iowr I[$r2] $r3 | ||
184 | |||
185 | // enable fifo, ctxsw, 9, 10, 15 interrupts | ||
186 | mov $r2 -0x78fc // 0x8704 | ||
187 | sethi $r2 0 | ||
188 | iowr I[$r1 + 0x000] $r2 // INTR_EN_SET | ||
189 | |||
190 | // fifo level triggered, rest edge | ||
191 | sub b32 $r1 0x100 | ||
192 | mov $r2 4 | ||
193 | iowr I[$r1] $r2 | ||
194 | |||
195 | // enable interrupts | ||
196 | bset $flags ie0 | ||
197 | |||
198 | // fetch enabled GPC/ROP counts | ||
199 | mov $r14 -0x69fc // 0x409604 | ||
200 | sethi $r14 0x400000 | ||
201 | call nv_rd32 | ||
202 | extr $r1 $r15 16:20 | ||
203 | st b32 D[$r0 + rop_count] $r1 | ||
204 | and $r15 0x1f | ||
205 | st b32 D[$r0 + gpc_count] $r15 | ||
206 | |||
207 | // set BAR_REQMASK to GPC mask | ||
208 | mov $r1 1 | ||
209 | shl b32 $r1 $r15 | ||
210 | sub b32 $r1 1 | ||
211 | mov $r2 0x40c | ||
212 | shl b32 $r2 6 | ||
213 | iowr I[$r2 + 0x000] $r1 | ||
214 | iowr I[$r2 + 0x100] $r1 | ||
215 | |||
216 | // find context data for this chipset | ||
217 | mov $r2 0x800 | ||
218 | shl b32 $r2 6 | ||
219 | iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] | ||
220 | mov $r15 chipsets - 8 | ||
221 | init_find_chipset: | ||
222 | add b32 $r15 8 | ||
223 | ld b32 $r3 D[$r15 + 0x00] | ||
224 | cmpu b32 $r3 $r2 | ||
225 | bra e init_context | ||
226 | cmpu b32 $r3 0 | ||
227 | bra ne init_find_chipset | ||
228 | // unknown chipset | ||
229 | ret | ||
230 | |||
231 | // context size calculation, reserve first 256 bytes for use by fuc | ||
232 | init_context: | ||
233 | mov $r1 256 | ||
234 | |||
235 | // calculate size of mmio context data | ||
236 | ld b16 $r14 D[$r15 + 4] | ||
237 | ld b16 $r15 D[$r15 + 6] | ||
238 | sethi $r14 0 | ||
239 | st b32 D[$r0 + hub_mmio_list_head] $r14 | ||
240 | st b32 D[$r0 + hub_mmio_list_tail] $r15 | ||
241 | call mmctx_size | ||
242 | |||
243 | // set mmctx base addresses now so we don't have to do it later, | ||
244 | // they don't (currently) ever change | ||
245 | mov $r3 0x700 | ||
246 | shl b32 $r3 6 | ||
247 | shr b32 $r4 $r1 8 | ||
248 | iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE | ||
249 | iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE | ||
250 | add b32 $r3 0x1300 | ||
251 | add b32 $r1 $r15 | ||
252 | shr b32 $r15 2 | ||
253 | iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!? | ||
254 | |||
255 | // strands, base offset needs to be aligned to 256 bytes | ||
256 | shr b32 $r1 8 | ||
257 | add b32 $r1 1 | ||
258 | shl b32 $r1 8 | ||
259 | mov b32 $r15 $r1 | ||
260 | call strand_ctx_init | ||
261 | add b32 $r1 $r15 | ||
262 | |||
263 | // initialise each GPC in sequence by passing in the offset of its | ||
264 | // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which | ||
265 | // has previously been uploaded by the host) running. | ||
266 | // | ||
267 | // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31 | ||
268 | // when it has completed, and return the size of its context data | ||
269 | // in GPCn_CC_SCRATCH[1] | ||
270 | // | ||
271 | ld b32 $r3 D[$r0 + gpc_count] | ||
272 | mov $r4 0x2000 | ||
273 | sethi $r4 0x500000 | ||
274 | init_gpc: | ||
275 | // setup, and start GPC ucode running | ||
276 | add b32 $r14 $r4 0x804 | ||
277 | mov b32 $r15 $r1 | ||
278 | call nv_wr32 // CC_SCRATCH[1] = ctx offset | ||
279 | add b32 $r14 $r4 0x800 | ||
280 | mov b32 $r15 $r2 | ||
281 | call nv_wr32 // CC_SCRATCH[0] = chipset | ||
282 | add b32 $r14 $r4 0x10c | ||
283 | clear b32 $r15 | ||
284 | call nv_wr32 | ||
285 | add b32 $r14 $r4 0x104 | ||
286 | call nv_wr32 // ENTRY | ||
287 | add b32 $r14 $r4 0x100 | ||
288 | mov $r15 2 // CTRL_START_TRIGGER | ||
289 | call nv_wr32 // CTRL | ||
290 | |||
291 | // wait for it to complete, and adjust context size | ||
292 | add b32 $r14 $r4 0x800 | ||
293 | init_gpc_wait: | ||
294 | call nv_rd32 | ||
295 | xbit $r15 $r15 31 | ||
296 | bra e init_gpc_wait | ||
297 | add b32 $r14 $r4 0x804 | ||
298 | call nv_rd32 | ||
299 | add b32 $r1 $r15 | ||
300 | |||
301 | // next! | ||
302 | add b32 $r4 0x8000 | ||
303 | sub b32 $r3 1 | ||
304 | bra ne init_gpc | ||
305 | |||
306 | // save context size, and tell host we're ready | ||
307 | mov $r2 0x800 | ||
308 | shl b32 $r2 6 | ||
309 | iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size | ||
310 | add b32 $r2 0x800 | ||
311 | clear b32 $r1 | ||
312 | bset $r1 31 | ||
313 | iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000 | ||
314 | |||
315 | // Main program loop, very simple, sleeps until woken up by the interrupt | ||
316 | // handler, pulls a command from the queue and executes its handler | ||
317 | // | ||
318 | main: | ||
319 | // sleep until we have something to do | ||
320 | bset $flags $p0 | ||
321 | sleep $p0 | ||
322 | mov $r13 cmd_queue | ||
323 | call queue_get | ||
324 | bra $p1 main | ||
325 | |||
326 | // context switch, requested by GPU? | ||
327 | cmpu b32 $r14 0x4001 | ||
328 | bra ne main_not_ctx_switch | ||
329 | trace_set(T_AUTO) | ||
330 | mov $r1 0xb00 | ||
331 | shl b32 $r1 6 | ||
332 | iord $r2 I[$r1 + 0x100] // CHAN_NEXT | ||
333 | iord $r1 I[$r1 + 0x000] // CHAN_CUR | ||
334 | |||
335 | xbit $r3 $r1 31 | ||
336 | bra e chsw_no_prev | ||
337 | xbit $r3 $r2 31 | ||
338 | bra e chsw_prev_no_next | ||
339 | push $r2 | ||
340 | mov b32 $r2 $r1 | ||
341 | trace_set(T_SAVE) | ||
342 | bclr $flags $p1 | ||
343 | bset $flags $p2 | ||
344 | call ctx_xfer | ||
345 | trace_clr(T_SAVE); | ||
346 | pop $r2 | ||
347 | trace_set(T_LOAD); | ||
348 | bset $flags $p1 | ||
349 | call ctx_xfer | ||
350 | trace_clr(T_LOAD); | ||
351 | bra chsw_done | ||
352 | chsw_prev_no_next: | ||
353 | push $r2 | ||
354 | mov b32 $r2 $r1 | ||
355 | bclr $flags $p1 | ||
356 | bclr $flags $p2 | ||
357 | call ctx_xfer | ||
358 | pop $r2 | ||
359 | mov $r1 0xb00 | ||
360 | shl b32 $r1 6 | ||
361 | iowr I[$r1] $r2 | ||
362 | bra chsw_done | ||
363 | chsw_no_prev: | ||
364 | xbit $r3 $r2 31 | ||
365 | bra e chsw_done | ||
366 | bset $flags $p1 | ||
367 | bclr $flags $p2 | ||
368 | call ctx_xfer | ||
369 | |||
370 | // ack the context switch request | ||
371 | chsw_done: | ||
372 | mov $r1 0xb0c | ||
373 | shl b32 $r1 6 | ||
374 | mov $r2 1 | ||
375 | iowr I[$r1 + 0x000] $r2 // 0x409b0c | ||
376 | trace_clr(T_AUTO) | ||
377 | bra main | ||
378 | |||
379 | // request to set current channel? (*not* a context switch) | ||
380 | main_not_ctx_switch: | ||
381 | cmpu b32 $r14 0x0001 | ||
382 | bra ne main_not_ctx_chan | ||
383 | mov b32 $r2 $r15 | ||
384 | call ctx_chan | ||
385 | bra main_done | ||
386 | |||
387 | // request to store current channel context? | ||
388 | main_not_ctx_chan: | ||
389 | cmpu b32 $r14 0x0002 | ||
390 | bra ne main_not_ctx_save | ||
391 | trace_set(T_SAVE) | ||
392 | bclr $flags $p1 | ||
393 | bclr $flags $p2 | ||
394 | call ctx_xfer | ||
395 | trace_clr(T_SAVE) | ||
396 | bra main_done | ||
397 | |||
398 | main_not_ctx_save: | ||
399 | shl b32 $r15 $r14 16 | ||
400 | or $r15 E_BAD_COMMAND | ||
401 | call error | ||
402 | bra main | ||
403 | |||
404 | main_done: | ||
405 | mov $r1 0x820 | ||
406 | shl b32 $r1 6 | ||
407 | clear b32 $r2 | ||
408 | bset $r2 31 | ||
409 | iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 | ||
410 | bra main | ||
411 | |||
412 | // interrupt handler | ||
413 | ih: | ||
414 | push $r8 | ||
415 | mov $r8 $flags | ||
416 | push $r8 | ||
417 | push $r9 | ||
418 | push $r10 | ||
419 | push $r11 | ||
420 | push $r13 | ||
421 | push $r14 | ||
422 | push $r15 | ||
423 | |||
424 | // incoming fifo command? | ||
425 | iord $r10 I[$r0 + 0x200] // INTR | ||
426 | and $r11 $r10 0x00000004 | ||
427 | bra e ih_no_fifo | ||
428 | // queue incoming fifo command for later processing | ||
429 | mov $r11 0x1900 | ||
430 | mov $r13 cmd_queue | ||
431 | iord $r14 I[$r11 + 0x100] // FIFO_CMD | ||
432 | iord $r15 I[$r11 + 0x000] // FIFO_DATA | ||
433 | call queue_put | ||
434 | add b32 $r11 0x400 | ||
435 | mov $r14 1 | ||
436 | iowr I[$r11 + 0x000] $r14 // FIFO_ACK | ||
437 | |||
438 | // context switch request? | ||
439 | ih_no_fifo: | ||
440 | and $r11 $r10 0x00000100 | ||
441 | bra e ih_no_ctxsw | ||
442 | // enqueue a context switch for later processing | ||
443 | mov $r13 cmd_queue | ||
444 | mov $r14 0x4001 | ||
445 | call queue_put | ||
446 | |||
447 | // anything we didn't handle, bring it to the host's attention | ||
448 | ih_no_ctxsw: | ||
449 | mov $r11 0x104 | ||
450 | not b32 $r11 | ||
451 | and $r11 $r10 $r11 | ||
452 | bra e ih_no_other | ||
453 | mov $r10 0xc1c | ||
454 | shl b32 $r10 6 | ||
455 | iowr I[$r10] $r11 // INTR_UP_SET | ||
456 | |||
457 | // ack, and wake up main() | ||
458 | ih_no_other: | ||
459 | iowr I[$r0 + 0x100] $r10 // INTR_ACK | ||
460 | |||
461 | pop $r15 | ||
462 | pop $r14 | ||
463 | pop $r13 | ||
464 | pop $r11 | ||
465 | pop $r10 | ||
466 | pop $r9 | ||
467 | pop $r8 | ||
468 | mov $flags $r8 | ||
469 | pop $r8 | ||
470 | bclr $flags $p0 | ||
471 | iret | ||
472 | |||
473 | // Not real sure, but, MEM_CMD 7 will hang forever if this isn't done | ||
474 | ctx_4160s: | ||
475 | mov $r14 0x4160 | ||
476 | sethi $r14 0x400000 | ||
477 | mov $r15 1 | ||
478 | call nv_wr32 | ||
479 | ctx_4160s_wait: | ||
480 | call nv_rd32 | ||
481 | xbit $r15 $r15 4 | ||
482 | bra e ctx_4160s_wait | ||
483 | ret | ||
484 | |||
485 | // Without clearing again at end of xfer, some things cause PGRAPH | ||
486 | // to hang with STATUS=0x00000007 until it's cleared.. fbcon can | ||
487 | // still function with it set however... | ||
488 | ctx_4160c: | ||
489 | mov $r14 0x4160 | ||
490 | sethi $r14 0x400000 | ||
491 | clear b32 $r15 | ||
492 | call nv_wr32 | ||
493 | ret | ||
494 | |||
495 | // Again, not real sure | ||
496 | // | ||
497 | // In: $r15 value to set 0x404170 to | ||
498 | // | ||
499 | ctx_4170s: | ||
500 | mov $r14 0x4170 | ||
501 | sethi $r14 0x400000 | ||
502 | or $r15 0x10 | ||
503 | call nv_wr32 | ||
504 | ret | ||
505 | |||
506 | // Waits for a ctx_4170s() call to complete | ||
507 | // | ||
508 | ctx_4170w: | ||
509 | mov $r14 0x4170 | ||
510 | sethi $r14 0x400000 | ||
511 | call nv_rd32 | ||
512 | and $r15 0x10 | ||
513 | bra ne ctx_4170w | ||
514 | ret | ||
515 | |||
516 | // Disables various things, waits a bit, and re-enables them.. | ||
517 | // | ||
518 | // Not sure how exactly this helps, perhaps "ENABLE" is not such a | ||
519 | // good description for the bits we turn off? Anyways, without this, | ||
520 | // funny things happen. | ||
521 | // | ||
522 | ctx_redswitch: | ||
523 | mov $r14 0x614 | ||
524 | shl b32 $r14 6 | ||
525 | mov $r15 0x270 | ||
526 | iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL | ||
527 | mov $r15 8 | ||
528 | ctx_redswitch_delay: | ||
529 | sub b32 $r15 1 | ||
530 | bra ne ctx_redswitch_delay | ||
531 | mov $r15 0x770 | ||
532 | iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL | ||
533 | ret | ||
534 | |||
535 | // Not a clue what this is for, except that unless the value is 0x10, the | ||
536 | // strand context is saved (and presumably restored) incorrectly.. | ||
537 | // | ||
538 | // In: $r15 value to set to (0x00/0x10 are used) | ||
539 | // | ||
540 | ctx_86c: | ||
541 | mov $r14 0x86c | ||
542 | shl b32 $r14 6 | ||
543 | iowr I[$r14] $r15 // HUB(0x86c) = val | ||
544 | mov $r14 -0x75ec | ||
545 | sethi $r14 0x400000 | ||
546 | call nv_wr32 // ROP(0xa14) = val | ||
547 | mov $r14 -0x5794 | ||
548 | sethi $r14 0x410000 | ||
549 | call nv_wr32 // GPC(0x86c) = val | ||
550 | ret | ||
551 | |||
552 | // ctx_load - load's a channel's ctxctl data, and selects its vm | ||
553 | // | ||
554 | // In: $r2 channel address | ||
555 | // | ||
556 | ctx_load: | ||
557 | trace_set(T_CHAN) | ||
558 | |||
559 | // switch to channel, somewhat magic in parts.. | ||
560 | mov $r10 12 // DONE_UNK12 | ||
561 | call wait_donez | ||
562 | mov $r1 0xa24 | ||
563 | shl b32 $r1 6 | ||
564 | iowr I[$r1 + 0x000] $r0 // 0x409a24 | ||
565 | mov $r3 0xb00 | ||
566 | shl b32 $r3 6 | ||
567 | iowr I[$r3 + 0x100] $r2 // CHAN_NEXT | ||
568 | mov $r1 0xa0c | ||
569 | shl b32 $r1 6 | ||
570 | mov $r4 7 | ||
571 | iowr I[$r1 + 0x000] $r2 // MEM_CHAN | ||
572 | iowr I[$r1 + 0x100] $r4 // MEM_CMD | ||
573 | ctx_chan_wait_0: | ||
574 | iord $r4 I[$r1 + 0x100] | ||
575 | and $r4 0x1f | ||
576 | bra ne ctx_chan_wait_0 | ||
577 | iowr I[$r3 + 0x000] $r2 // CHAN_CUR | ||
578 | |||
579 | // load channel header, fetch PGRAPH context pointer | ||
580 | mov $xtargets $r0 | ||
581 | bclr $r2 31 | ||
582 | shl b32 $r2 4 | ||
583 | add b32 $r2 2 | ||
584 | |||
585 | trace_set(T_LCHAN) | ||
586 | mov $r1 0xa04 | ||
587 | shl b32 $r1 6 | ||
588 | iowr I[$r1 + 0x000] $r2 // MEM_BASE | ||
589 | mov $r1 0xa20 | ||
590 | shl b32 $r1 6 | ||
591 | mov $r2 0x0002 | ||
592 | sethi $r2 0x80000000 | ||
593 | iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram | ||
594 | mov $r1 0x10 // chan + 0x0210 | ||
595 | mov $r2 xfer_data | ||
596 | sethi $r2 0x00020000 // 16 bytes | ||
597 | xdld $r1 $r2 | ||
598 | xdwait | ||
599 | trace_clr(T_LCHAN) | ||
600 | |||
601 | // update current context | ||
602 | ld b32 $r1 D[$r0 + xfer_data + 4] | ||
603 | shl b32 $r1 24 | ||
604 | ld b32 $r2 D[$r0 + xfer_data + 0] | ||
605 | shr b32 $r2 8 | ||
606 | or $r1 $r2 | ||
607 | st b32 D[$r0 + ctx_current] $r1 | ||
608 | |||
609 | // set transfer base to start of context, and fetch context header | ||
610 | trace_set(T_LCTXH) | ||
611 | mov $r2 0xa04 | ||
612 | shl b32 $r2 6 | ||
613 | iowr I[$r2 + 0x000] $r1 // MEM_BASE | ||
614 | mov $r2 1 | ||
615 | mov $r1 0xa20 | ||
616 | shl b32 $r1 6 | ||
617 | iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm | ||
618 | mov $r1 chan_data | ||
619 | sethi $r1 0x00060000 // 256 bytes | ||
620 | xdld $r0 $r1 | ||
621 | xdwait | ||
622 | trace_clr(T_LCTXH) | ||
623 | |||
624 | trace_clr(T_CHAN) | ||
625 | ret | ||
626 | |||
627 | // ctx_chan - handler for HUB_SET_CHAN command, will set a channel as | ||
628 | // the active channel for ctxctl, but not actually transfer | ||
629 | // any context data. intended for use only during initial | ||
630 | // context construction. | ||
631 | // | ||
632 | // In: $r2 channel address | ||
633 | // | ||
634 | ctx_chan: | ||
635 | call ctx_4160s | ||
636 | call ctx_load | ||
637 | mov $r10 12 // DONE_UNK12 | ||
638 | call wait_donez | ||
639 | mov $r1 0xa10 | ||
640 | shl b32 $r1 6 | ||
641 | mov $r2 5 | ||
642 | iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???) | ||
643 | ctx_chan_wait: | ||
644 | iord $r2 I[$r1 + 0x000] | ||
645 | or $r2 $r2 | ||
646 | bra ne ctx_chan_wait | ||
647 | call ctx_4160c | ||
648 | ret | ||
649 | |||
650 | // Execute per-context state overrides list | ||
651 | // | ||
652 | // Only executed on the first load of a channel. Might want to look into | ||
653 | // removing this and having the host directly modify the channel's context | ||
654 | // to change this state... The nouveau DRM already builds this list as | ||
655 | // it's definitely needed for NVIDIA's, so we may as well use it for now | ||
656 | // | ||
657 | // Input: $r1 mmio list length | ||
658 | // | ||
659 | ctx_mmio_exec: | ||
660 | // set transfer base to be the mmio list | ||
661 | ld b32 $r3 D[$r0 + chan_mmio_address] | ||
662 | mov $r2 0xa04 | ||
663 | shl b32 $r2 6 | ||
664 | iowr I[$r2 + 0x000] $r3 // MEM_BASE | ||
665 | |||
666 | clear b32 $r3 | ||
667 | ctx_mmio_loop: | ||
668 | // fetch next 256 bytes of mmio list if necessary | ||
669 | and $r4 $r3 0xff | ||
670 | bra ne ctx_mmio_pull | ||
671 | mov $r5 xfer_data | ||
672 | sethi $r5 0x00060000 // 256 bytes | ||
673 | xdld $r3 $r5 | ||
674 | xdwait | ||
675 | |||
676 | // execute a single list entry | ||
677 | ctx_mmio_pull: | ||
678 | ld b32 $r14 D[$r4 + xfer_data + 0x00] | ||
679 | ld b32 $r15 D[$r4 + xfer_data + 0x04] | ||
680 | call nv_wr32 | ||
681 | |||
682 | // next! | ||
683 | add b32 $r3 8 | ||
684 | sub b32 $r1 1 | ||
685 | bra ne ctx_mmio_loop | ||
686 | |||
687 | // set transfer base back to the current context | ||
688 | ctx_mmio_done: | ||
689 | ld b32 $r3 D[$r0 + ctx_current] | ||
690 | iowr I[$r2 + 0x000] $r3 // MEM_BASE | ||
691 | |||
692 | // disable the mmio list now, we don't need/want to execute it again | ||
693 | st b32 D[$r0 + chan_mmio_count] $r0 | ||
694 | mov $r1 chan_data | ||
695 | sethi $r1 0x00060000 // 256 bytes | ||
696 | xdst $r0 $r1 | ||
697 | xdwait | ||
698 | ret | ||
699 | |||
700 | // Transfer HUB context data between GPU and storage area | ||
701 | // | ||
702 | // In: $r2 channel address | ||
703 | // $p1 clear on save, set on load | ||
704 | // $p2 set if opposite direction done/will be done, so: | ||
705 | // on save it means: "a load will follow this save" | ||
706 | // on load it means: "a save preceeded this load" | ||
707 | // | ||
708 | ctx_xfer: | ||
709 | bra not $p1 ctx_xfer_pre | ||
710 | bra $p2 ctx_xfer_pre_load | ||
711 | ctx_xfer_pre: | ||
712 | mov $r15 0x10 | ||
713 | call ctx_86c | ||
714 | call ctx_4160s | ||
715 | bra not $p1 ctx_xfer_exec | ||
716 | |||
717 | ctx_xfer_pre_load: | ||
718 | mov $r15 2 | ||
719 | call ctx_4170s | ||
720 | call ctx_4170w | ||
721 | call ctx_redswitch | ||
722 | clear b32 $r15 | ||
723 | call ctx_4170s | ||
724 | call ctx_load | ||
725 | |||
726 | // fetch context pointer, and initiate xfer on all GPCs | ||
727 | ctx_xfer_exec: | ||
728 | ld b32 $r1 D[$r0 + ctx_current] | ||
729 | mov $r2 0x414 | ||
730 | shl b32 $r2 6 | ||
731 | iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset | ||
732 | mov $r14 -0x5b00 | ||
733 | sethi $r14 0x410000 | ||
734 | mov b32 $r15 $r1 | ||
735 | call nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer | ||
736 | add b32 $r14 4 | ||
737 | xbit $r15 $flags $p1 | ||
738 | xbit $r2 $flags $p2 | ||
739 | shl b32 $r2 1 | ||
740 | or $r15 $r2 | ||
741 | call nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type) | ||
742 | |||
743 | // strands | ||
744 | mov $r1 0x4afc | ||
745 | sethi $r1 0x20000 | ||
746 | mov $r2 0xc | ||
747 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c | ||
748 | call strand_wait | ||
749 | mov $r2 0x47fc | ||
750 | sethi $r2 0x20000 | ||
751 | iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 | ||
752 | xbit $r2 $flags $p1 | ||
753 | add b32 $r2 3 | ||
754 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) | ||
755 | |||
756 | // mmio context | ||
757 | xbit $r10 $flags $p1 // direction | ||
758 | or $r10 6 // first, last | ||
759 | mov $r11 0 // base = 0 | ||
760 | ld b32 $r12 D[$r0 + hub_mmio_list_head] | ||
761 | ld b32 $r13 D[$r0 + hub_mmio_list_tail] | ||
762 | mov $r14 0 // not multi | ||
763 | call mmctx_xfer | ||
764 | |||
765 | // wait for GPCs to all complete | ||
766 | mov $r10 8 // DONE_BAR | ||
767 | call wait_doneo | ||
768 | |||
769 | // wait for strand xfer to complete | ||
770 | call strand_wait | ||
771 | |||
772 | // post-op | ||
773 | bra $p1 ctx_xfer_post | ||
774 | mov $r10 12 // DONE_UNK12 | ||
775 | call wait_donez | ||
776 | mov $r1 0xa10 | ||
777 | shl b32 $r1 6 | ||
778 | mov $r2 5 | ||
779 | iowr I[$r1] $r2 // MEM_CMD | ||
780 | ctx_xfer_post_save_wait: | ||
781 | iord $r2 I[$r1] | ||
782 | or $r2 $r2 | ||
783 | bra ne ctx_xfer_post_save_wait | ||
784 | |||
785 | bra $p2 ctx_xfer_done | ||
786 | ctx_xfer_post: | ||
787 | mov $r15 2 | ||
788 | call ctx_4170s | ||
789 | clear b32 $r15 | ||
790 | call ctx_86c | ||
791 | call strand_post | ||
792 | call ctx_4170w | ||
793 | clear b32 $r15 | ||
794 | call ctx_4170s | ||
795 | |||
796 | bra not $p1 ctx_xfer_no_post_mmio | ||
797 | ld b32 $r1 D[$r0 + chan_mmio_count] | ||
798 | or $r1 $r1 | ||
799 | bra e ctx_xfer_no_post_mmio | ||
800 | call ctx_mmio_exec | ||
801 | |||
802 | ctx_xfer_no_post_mmio: | ||
803 | call ctx_4160c | ||
804 | |||
805 | ctx_xfer_done: | ||
806 | ret | ||
807 | |||
808 | .align 256 | ||