aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/nouveau/nvc0_grhub.fuc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/nouveau/nvc0_grhub.fuc')
-rw-r--r--drivers/gpu/drm/nouveau/nvc0_grhub.fuc808
1 files changed, 808 insertions, 0 deletions
diff --git a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc
new file mode 100644
index 000000000000..a1a599124cf4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc
@@ -0,0 +1,808 @@
1/* fuc microcode for nvc0 PGRAPH/HUB
2 *
3 * Copyright 2011 Red Hat Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: Ben Skeggs
24 */
25
26/* To build:
27 * m4 nvc0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grhub.fuc.h
28 */
29
30.section nvc0_grhub_data
31include(`nvc0_graph.fuc')
32gpc_count: .b32 0
33rop_count: .b32 0
34cmd_queue: queue_init
35hub_mmio_list_head: .b32 0
36hub_mmio_list_tail: .b32 0
37
38ctx_current: .b32 0
39
40chipsets:
41.b8 0xc0 0 0 0
42.b16 nvc0_hub_mmio_head
43.b16 nvc0_hub_mmio_tail
44.b8 0xc1 0 0 0
45.b16 nvc0_hub_mmio_head
46.b16 nvc1_hub_mmio_tail
47.b8 0xc3 0 0 0
48.b16 nvc0_hub_mmio_head
49.b16 nvc0_hub_mmio_tail
50.b8 0xc4 0 0 0
51.b16 nvc0_hub_mmio_head
52.b16 nvc0_hub_mmio_tail
53.b8 0xc8 0 0 0
54.b16 nvc0_hub_mmio_head
55.b16 nvc0_hub_mmio_tail
56.b8 0xce 0 0 0
57.b16 nvc0_hub_mmio_head
58.b16 nvc0_hub_mmio_tail
59.b8 0 0 0 0
60
61nvc0_hub_mmio_head:
62mmctx_data(0x17e91c, 2)
63mmctx_data(0x400204, 2)
64mmctx_data(0x404004, 11)
65mmctx_data(0x404044, 1)
66mmctx_data(0x404094, 14)
67mmctx_data(0x4040d0, 7)
68mmctx_data(0x4040f8, 1)
69mmctx_data(0x404130, 3)
70mmctx_data(0x404150, 3)
71mmctx_data(0x404164, 2)
72mmctx_data(0x404174, 3)
73mmctx_data(0x404200, 8)
74mmctx_data(0x404404, 14)
75mmctx_data(0x404460, 4)
76mmctx_data(0x404480, 1)
77mmctx_data(0x404498, 1)
78mmctx_data(0x404604, 4)
79mmctx_data(0x404618, 32)
80mmctx_data(0x404698, 21)
81mmctx_data(0x4046f0, 2)
82mmctx_data(0x404700, 22)
83mmctx_data(0x405800, 1)
84mmctx_data(0x405830, 3)
85mmctx_data(0x405854, 1)
86mmctx_data(0x405870, 4)
87mmctx_data(0x405a00, 2)
88mmctx_data(0x405a18, 1)
89mmctx_data(0x406020, 1)
90mmctx_data(0x406028, 4)
91mmctx_data(0x4064a8, 2)
92mmctx_data(0x4064b4, 2)
93mmctx_data(0x407804, 1)
94mmctx_data(0x40780c, 6)
95mmctx_data(0x4078bc, 1)
96mmctx_data(0x408000, 7)
97mmctx_data(0x408064, 1)
98mmctx_data(0x408800, 3)
99mmctx_data(0x408900, 4)
100mmctx_data(0x408980, 1)
101nvc0_hub_mmio_tail:
102mmctx_data(0x4064c0, 2)
103nvc1_hub_mmio_tail:
104
105.align 256
106chan_data:
107chan_mmio_count: .b32 0
108chan_mmio_address: .b32 0
109
110.align 256
111xfer_data: .b32 0
112
113.section nvc0_grhub_code
114bra init
115define(`include_code')
116include(`nvc0_graph.fuc')
117
118// reports an exception to the host
119//
120// In: $r15 error code (see nvc0_graph.fuc)
121//
122error:
123 push $r14
124 mov $r14 0x814
125 shl b32 $r14 6
126 iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code
127 mov $r14 0xc1c
128 shl b32 $r14 6
129 mov $r15 1
130 iowr I[$r14 + 0x000] $r15 // INTR_UP_SET
131 pop $r14
132 ret
133
134// HUB fuc initialisation, executed by triggering ucode start, will
135// fall through to main loop after completion.
136//
137// Input:
138// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
139//
140// Output:
141// CC_SCRATCH[0]:
142// 31:31: set to signal completion
143// CC_SCRATCH[1]:
144// 31:0: total PGRAPH context size
145//
146init:
147 clear b32 $r0
148 mov $sp $r0
149 mov $xdbase $r0
150
151 // enable fifo access
152 mov $r1 0x1200
153 mov $r2 2
154 iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
155
156 // setup i0 handler, and route all interrupts to it
157 mov $r1 ih
158 mov $iv0 $r1
159 mov $r1 0x400
160 iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
161
162 // route HUB_CHANNEL_SWITCH to fuc interrupt 8
163 mov $r3 0x404
164 shl b32 $r3 6
165 mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8
166 iowr I[$r3 + 0x000] $r2
167
168 // not sure what these are, route them because NVIDIA does, and
169 // the IRQ handler will signal the host if we ever get one.. we
170 // may find out if/why we need to handle these if so..
171 //
172 mov $r2 0x2004
173 iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9
174 mov $r2 0x200b
175 iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10
176 mov $r2 0x200c
177 iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15
178
179 // enable all INTR_UP interrupts
180 mov $r2 0xc24
181 shl b32 $r2 6
182 not b32 $r3 $r0
183 iowr I[$r2] $r3
184
185 // enable fifo, ctxsw, 9, 10, 15 interrupts
186 mov $r2 -0x78fc // 0x8704
187 sethi $r2 0
188 iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
189
190 // fifo level triggered, rest edge
191 sub b32 $r1 0x100
192 mov $r2 4
193 iowr I[$r1] $r2
194
195 // enable interrupts
196 bset $flags ie0
197
198 // fetch enabled GPC/ROP counts
199 mov $r14 -0x69fc // 0x409604
200 sethi $r14 0x400000
201 call nv_rd32
202 extr $r1 $r15 16:20
203 st b32 D[$r0 + rop_count] $r1
204 and $r15 0x1f
205 st b32 D[$r0 + gpc_count] $r15
206
207 // set BAR_REQMASK to GPC mask
208 mov $r1 1
209 shl b32 $r1 $r15
210 sub b32 $r1 1
211 mov $r2 0x40c
212 shl b32 $r2 6
213 iowr I[$r2 + 0x000] $r1
214 iowr I[$r2 + 0x100] $r1
215
216 // find context data for this chipset
217 mov $r2 0x800
218 shl b32 $r2 6
219 iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
220 mov $r15 chipsets - 8
221 init_find_chipset:
222 add b32 $r15 8
223 ld b32 $r3 D[$r15 + 0x00]
224 cmpu b32 $r3 $r2
225 bra e init_context
226 cmpu b32 $r3 0
227 bra ne init_find_chipset
228 // unknown chipset
229 ret
230
231 // context size calculation, reserve first 256 bytes for use by fuc
232 init_context:
233 mov $r1 256
234
235 // calculate size of mmio context data
236 ld b16 $r14 D[$r15 + 4]
237 ld b16 $r15 D[$r15 + 6]
238 sethi $r14 0
239 st b32 D[$r0 + hub_mmio_list_head] $r14
240 st b32 D[$r0 + hub_mmio_list_tail] $r15
241 call mmctx_size
242
243 // set mmctx base addresses now so we don't have to do it later,
244 // they don't (currently) ever change
245 mov $r3 0x700
246 shl b32 $r3 6
247 shr b32 $r4 $r1 8
248 iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE
249 iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE
250 add b32 $r3 0x1300
251 add b32 $r1 $r15
252 shr b32 $r15 2
253 iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!?
254
255 // strands, base offset needs to be aligned to 256 bytes
256 shr b32 $r1 8
257 add b32 $r1 1
258 shl b32 $r1 8
259 mov b32 $r15 $r1
260 call strand_ctx_init
261 add b32 $r1 $r15
262
263 // initialise each GPC in sequence by passing in the offset of its
264 // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
265 // has previously been uploaded by the host) running.
266 //
267 // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
268 // when it has completed, and return the size of its context data
269 // in GPCn_CC_SCRATCH[1]
270 //
271 ld b32 $r3 D[$r0 + gpc_count]
272 mov $r4 0x2000
273 sethi $r4 0x500000
274 init_gpc:
275 // setup, and start GPC ucode running
276 add b32 $r14 $r4 0x804
277 mov b32 $r15 $r1
278 call nv_wr32 // CC_SCRATCH[1] = ctx offset
279 add b32 $r14 $r4 0x800
280 mov b32 $r15 $r2
281 call nv_wr32 // CC_SCRATCH[0] = chipset
282 add b32 $r14 $r4 0x10c
283 clear b32 $r15
284 call nv_wr32
285 add b32 $r14 $r4 0x104
286 call nv_wr32 // ENTRY
287 add b32 $r14 $r4 0x100
288 mov $r15 2 // CTRL_START_TRIGGER
289 call nv_wr32 // CTRL
290
291 // wait for it to complete, and adjust context size
292 add b32 $r14 $r4 0x800
293 init_gpc_wait:
294 call nv_rd32
295 xbit $r15 $r15 31
296 bra e init_gpc_wait
297 add b32 $r14 $r4 0x804
298 call nv_rd32
299 add b32 $r1 $r15
300
301 // next!
302 add b32 $r4 0x8000
303 sub b32 $r3 1
304 bra ne init_gpc
305
306 // save context size, and tell host we're ready
307 mov $r2 0x800
308 shl b32 $r2 6
309 iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size
310 add b32 $r2 0x800
311 clear b32 $r1
312 bset $r1 31
313 iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000
314
315// Main program loop, very simple, sleeps until woken up by the interrupt
316// handler, pulls a command from the queue and executes its handler
317//
318main:
319 // sleep until we have something to do
320 bset $flags $p0
321 sleep $p0
322 mov $r13 cmd_queue
323 call queue_get
324 bra $p1 main
325
326 // context switch, requested by GPU?
327 cmpu b32 $r14 0x4001
328 bra ne main_not_ctx_switch
329 trace_set(T_AUTO)
330 mov $r1 0xb00
331 shl b32 $r1 6
332 iord $r2 I[$r1 + 0x100] // CHAN_NEXT
333 iord $r1 I[$r1 + 0x000] // CHAN_CUR
334
335 xbit $r3 $r1 31
336 bra e chsw_no_prev
337 xbit $r3 $r2 31
338 bra e chsw_prev_no_next
339 push $r2
340 mov b32 $r2 $r1
341 trace_set(T_SAVE)
342 bclr $flags $p1
343 bset $flags $p2
344 call ctx_xfer
345 trace_clr(T_SAVE);
346 pop $r2
347 trace_set(T_LOAD);
348 bset $flags $p1
349 call ctx_xfer
350 trace_clr(T_LOAD);
351 bra chsw_done
352 chsw_prev_no_next:
353 push $r2
354 mov b32 $r2 $r1
355 bclr $flags $p1
356 bclr $flags $p2
357 call ctx_xfer
358 pop $r2
359 mov $r1 0xb00
360 shl b32 $r1 6
361 iowr I[$r1] $r2
362 bra chsw_done
363 chsw_no_prev:
364 xbit $r3 $r2 31
365 bra e chsw_done
366 bset $flags $p1
367 bclr $flags $p2
368 call ctx_xfer
369
370 // ack the context switch request
371 chsw_done:
372 mov $r1 0xb0c
373 shl b32 $r1 6
374 mov $r2 1
375 iowr I[$r1 + 0x000] $r2 // 0x409b0c
376 trace_clr(T_AUTO)
377 bra main
378
379 // request to set current channel? (*not* a context switch)
380 main_not_ctx_switch:
381 cmpu b32 $r14 0x0001
382 bra ne main_not_ctx_chan
383 mov b32 $r2 $r15
384 call ctx_chan
385 bra main_done
386
387 // request to store current channel context?
388 main_not_ctx_chan:
389 cmpu b32 $r14 0x0002
390 bra ne main_not_ctx_save
391 trace_set(T_SAVE)
392 bclr $flags $p1
393 bclr $flags $p2
394 call ctx_xfer
395 trace_clr(T_SAVE)
396 bra main_done
397
398 main_not_ctx_save:
399 shl b32 $r15 $r14 16
400 or $r15 E_BAD_COMMAND
401 call error
402 bra main
403
404 main_done:
405 mov $r1 0x820
406 shl b32 $r1 6
407 clear b32 $r2
408 bset $r2 31
409 iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
410 bra main
411
412// interrupt handler
413ih:
414 push $r8
415 mov $r8 $flags
416 push $r8
417 push $r9
418 push $r10
419 push $r11
420 push $r13
421 push $r14
422 push $r15
423
424 // incoming fifo command?
425 iord $r10 I[$r0 + 0x200] // INTR
426 and $r11 $r10 0x00000004
427 bra e ih_no_fifo
428 // queue incoming fifo command for later processing
429 mov $r11 0x1900
430 mov $r13 cmd_queue
431 iord $r14 I[$r11 + 0x100] // FIFO_CMD
432 iord $r15 I[$r11 + 0x000] // FIFO_DATA
433 call queue_put
434 add b32 $r11 0x400
435 mov $r14 1
436 iowr I[$r11 + 0x000] $r14 // FIFO_ACK
437
438 // context switch request?
439 ih_no_fifo:
440 and $r11 $r10 0x00000100
441 bra e ih_no_ctxsw
442 // enqueue a context switch for later processing
443 mov $r13 cmd_queue
444 mov $r14 0x4001
445 call queue_put
446
447 // anything we didn't handle, bring it to the host's attention
448 ih_no_ctxsw:
449 mov $r11 0x104
450 not b32 $r11
451 and $r11 $r10 $r11
452 bra e ih_no_other
453 mov $r10 0xc1c
454 shl b32 $r10 6
455 iowr I[$r10] $r11 // INTR_UP_SET
456
457 // ack, and wake up main()
458 ih_no_other:
459 iowr I[$r0 + 0x100] $r10 // INTR_ACK
460
461 pop $r15
462 pop $r14
463 pop $r13
464 pop $r11
465 pop $r10
466 pop $r9
467 pop $r8
468 mov $flags $r8
469 pop $r8
470 bclr $flags $p0
471 iret
472
473// Not real sure, but, MEM_CMD 7 will hang forever if this isn't done
474ctx_4160s:
475 mov $r14 0x4160
476 sethi $r14 0x400000
477 mov $r15 1
478 call nv_wr32
479 ctx_4160s_wait:
480 call nv_rd32
481 xbit $r15 $r15 4
482 bra e ctx_4160s_wait
483 ret
484
485// Without clearing again at end of xfer, some things cause PGRAPH
486// to hang with STATUS=0x00000007 until it's cleared.. fbcon can
487// still function with it set however...
488ctx_4160c:
489 mov $r14 0x4160
490 sethi $r14 0x400000
491 clear b32 $r15
492 call nv_wr32
493 ret
494
495// Again, not real sure
496//
497// In: $r15 value to set 0x404170 to
498//
499ctx_4170s:
500 mov $r14 0x4170
501 sethi $r14 0x400000
502 or $r15 0x10
503 call nv_wr32
504 ret
505
506// Waits for a ctx_4170s() call to complete
507//
508ctx_4170w:
509 mov $r14 0x4170
510 sethi $r14 0x400000
511 call nv_rd32
512 and $r15 0x10
513 bra ne ctx_4170w
514 ret
515
516// Disables various things, waits a bit, and re-enables them..
517//
518// Not sure how exactly this helps, perhaps "ENABLE" is not such a
519// good description for the bits we turn off? Anyways, without this,
520// funny things happen.
521//
522ctx_redswitch:
523 mov $r14 0x614
524 shl b32 $r14 6
525 mov $r15 0x270
526 iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL
527 mov $r15 8
528 ctx_redswitch_delay:
529 sub b32 $r15 1
530 bra ne ctx_redswitch_delay
531 mov $r15 0x770
532 iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
533 ret
534
535// Not a clue what this is for, except that unless the value is 0x10, the
536// strand context is saved (and presumably restored) incorrectly..
537//
538// In: $r15 value to set to (0x00/0x10 are used)
539//
540ctx_86c:
541 mov $r14 0x86c
542 shl b32 $r14 6
543 iowr I[$r14] $r15 // HUB(0x86c) = val
544 mov $r14 -0x75ec
545 sethi $r14 0x400000
546 call nv_wr32 // ROP(0xa14) = val
547 mov $r14 -0x5794
548 sethi $r14 0x410000
549 call nv_wr32 // GPC(0x86c) = val
550 ret
551
552// ctx_load - load's a channel's ctxctl data, and selects its vm
553//
554// In: $r2 channel address
555//
556ctx_load:
557 trace_set(T_CHAN)
558
559 // switch to channel, somewhat magic in parts..
560 mov $r10 12 // DONE_UNK12
561 call wait_donez
562 mov $r1 0xa24
563 shl b32 $r1 6
564 iowr I[$r1 + 0x000] $r0 // 0x409a24
565 mov $r3 0xb00
566 shl b32 $r3 6
567 iowr I[$r3 + 0x100] $r2 // CHAN_NEXT
568 mov $r1 0xa0c
569 shl b32 $r1 6
570 mov $r4 7
571 iowr I[$r1 + 0x000] $r2 // MEM_CHAN
572 iowr I[$r1 + 0x100] $r4 // MEM_CMD
573 ctx_chan_wait_0:
574 iord $r4 I[$r1 + 0x100]
575 and $r4 0x1f
576 bra ne ctx_chan_wait_0
577 iowr I[$r3 + 0x000] $r2 // CHAN_CUR
578
579 // load channel header, fetch PGRAPH context pointer
580 mov $xtargets $r0
581 bclr $r2 31
582 shl b32 $r2 4
583 add b32 $r2 2
584
585 trace_set(T_LCHAN)
586 mov $r1 0xa04
587 shl b32 $r1 6
588 iowr I[$r1 + 0x000] $r2 // MEM_BASE
589 mov $r1 0xa20
590 shl b32 $r1 6
591 mov $r2 0x0002
592 sethi $r2 0x80000000
593 iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram
594 mov $r1 0x10 // chan + 0x0210
595 mov $r2 xfer_data
596 sethi $r2 0x00020000 // 16 bytes
597 xdld $r1 $r2
598 xdwait
599 trace_clr(T_LCHAN)
600
601 // update current context
602 ld b32 $r1 D[$r0 + xfer_data + 4]
603 shl b32 $r1 24
604 ld b32 $r2 D[$r0 + xfer_data + 0]
605 shr b32 $r2 8
606 or $r1 $r2
607 st b32 D[$r0 + ctx_current] $r1
608
609 // set transfer base to start of context, and fetch context header
610 trace_set(T_LCTXH)
611 mov $r2 0xa04
612 shl b32 $r2 6
613 iowr I[$r2 + 0x000] $r1 // MEM_BASE
614 mov $r2 1
615 mov $r1 0xa20
616 shl b32 $r1 6
617 iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm
618 mov $r1 chan_data
619 sethi $r1 0x00060000 // 256 bytes
620 xdld $r0 $r1
621 xdwait
622 trace_clr(T_LCTXH)
623
624 trace_clr(T_CHAN)
625 ret
626
627// ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
628// the active channel for ctxctl, but not actually transfer
629// any context data. intended for use only during initial
630// context construction.
631//
632// In: $r2 channel address
633//
634ctx_chan:
635 call ctx_4160s
636 call ctx_load
637 mov $r10 12 // DONE_UNK12
638 call wait_donez
639 mov $r1 0xa10
640 shl b32 $r1 6
641 mov $r2 5
642 iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???)
643 ctx_chan_wait:
644 iord $r2 I[$r1 + 0x000]
645 or $r2 $r2
646 bra ne ctx_chan_wait
647 call ctx_4160c
648 ret
649
650// Execute per-context state overrides list
651//
652// Only executed on the first load of a channel. Might want to look into
653// removing this and having the host directly modify the channel's context
654// to change this state... The nouveau DRM already builds this list as
655// it's definitely needed for NVIDIA's, so we may as well use it for now
656//
657// Input: $r1 mmio list length
658//
659ctx_mmio_exec:
660 // set transfer base to be the mmio list
661 ld b32 $r3 D[$r0 + chan_mmio_address]
662 mov $r2 0xa04
663 shl b32 $r2 6
664 iowr I[$r2 + 0x000] $r3 // MEM_BASE
665
666 clear b32 $r3
667 ctx_mmio_loop:
668 // fetch next 256 bytes of mmio list if necessary
669 and $r4 $r3 0xff
670 bra ne ctx_mmio_pull
671 mov $r5 xfer_data
672 sethi $r5 0x00060000 // 256 bytes
673 xdld $r3 $r5
674 xdwait
675
676 // execute a single list entry
677 ctx_mmio_pull:
678 ld b32 $r14 D[$r4 + xfer_data + 0x00]
679 ld b32 $r15 D[$r4 + xfer_data + 0x04]
680 call nv_wr32
681
682 // next!
683 add b32 $r3 8
684 sub b32 $r1 1
685 bra ne ctx_mmio_loop
686
687 // set transfer base back to the current context
688 ctx_mmio_done:
689 ld b32 $r3 D[$r0 + ctx_current]
690 iowr I[$r2 + 0x000] $r3 // MEM_BASE
691
692 // disable the mmio list now, we don't need/want to execute it again
693 st b32 D[$r0 + chan_mmio_count] $r0
694 mov $r1 chan_data
695 sethi $r1 0x00060000 // 256 bytes
696 xdst $r0 $r1
697 xdwait
698 ret
699
700// Transfer HUB context data between GPU and storage area
701//
702// In: $r2 channel address
703// $p1 clear on save, set on load
704// $p2 set if opposite direction done/will be done, so:
705// on save it means: "a load will follow this save"
706// on load it means: "a save preceeded this load"
707//
708ctx_xfer:
709 bra not $p1 ctx_xfer_pre
710 bra $p2 ctx_xfer_pre_load
711 ctx_xfer_pre:
712 mov $r15 0x10
713 call ctx_86c
714 call ctx_4160s
715 bra not $p1 ctx_xfer_exec
716
717 ctx_xfer_pre_load:
718 mov $r15 2
719 call ctx_4170s
720 call ctx_4170w
721 call ctx_redswitch
722 clear b32 $r15
723 call ctx_4170s
724 call ctx_load
725
726 // fetch context pointer, and initiate xfer on all GPCs
727 ctx_xfer_exec:
728 ld b32 $r1 D[$r0 + ctx_current]
729 mov $r2 0x414
730 shl b32 $r2 6
731 iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset
732 mov $r14 -0x5b00
733 sethi $r14 0x410000
734 mov b32 $r15 $r1
735 call nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer
736 add b32 $r14 4
737 xbit $r15 $flags $p1
738 xbit $r2 $flags $p2
739 shl b32 $r2 1
740 or $r15 $r2
741 call nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
742
743 // strands
744 mov $r1 0x4afc
745 sethi $r1 0x20000
746 mov $r2 0xc
747 iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
748 call strand_wait
749 mov $r2 0x47fc
750 sethi $r2 0x20000
751 iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
752 xbit $r2 $flags $p1
753 add b32 $r2 3
754 iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
755
756 // mmio context
757 xbit $r10 $flags $p1 // direction
758 or $r10 6 // first, last
759 mov $r11 0 // base = 0
760 ld b32 $r12 D[$r0 + hub_mmio_list_head]
761 ld b32 $r13 D[$r0 + hub_mmio_list_tail]
762 mov $r14 0 // not multi
763 call mmctx_xfer
764
765 // wait for GPCs to all complete
766 mov $r10 8 // DONE_BAR
767 call wait_doneo
768
769 // wait for strand xfer to complete
770 call strand_wait
771
772 // post-op
773 bra $p1 ctx_xfer_post
774 mov $r10 12 // DONE_UNK12
775 call wait_donez
776 mov $r1 0xa10
777 shl b32 $r1 6
778 mov $r2 5
779 iowr I[$r1] $r2 // MEM_CMD
780 ctx_xfer_post_save_wait:
781 iord $r2 I[$r1]
782 or $r2 $r2
783 bra ne ctx_xfer_post_save_wait
784
785 bra $p2 ctx_xfer_done
786 ctx_xfer_post:
787 mov $r15 2
788 call ctx_4170s
789 clear b32 $r15
790 call ctx_86c
791 call strand_post
792 call ctx_4170w
793 clear b32 $r15
794 call ctx_4170s
795
796 bra not $p1 ctx_xfer_no_post_mmio
797 ld b32 $r1 D[$r0 + chan_mmio_count]
798 or $r1 $r1
799 bra e ctx_xfer_no_post_mmio
800 call ctx_mmio_exec
801
802 ctx_xfer_no_post_mmio:
803 call ctx_4160c
804
805 ctx_xfer_done:
806 ret
807
808.align 256