aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/nouveau/nva3_copy.fuc
diff options
context:
space:
mode:
authorBen Skeggs <bskeggs@redhat.com>2012-07-04 09:44:54 -0400
committerBen Skeggs <bskeggs@redhat.com>2012-10-02 23:12:43 -0400
commit02a841d434513c7b3620250271c372fabce56de5 (patch)
tree464e7651bc65e8b100ad9eb949729da3d491591a /drivers/gpu/drm/nouveau/nva3_copy.fuc
parent3a92d37e4099054fe187b485a9d27c439c10eca7 (diff)
drm/nouveau: restructure source tree, split core from drm implementation
Future work will be headed in the way of separating the policy supplied by the nouveau drm module from the mechanisms provided by the driver core. There will be a couple of major classes (subdev, engine) of driver modules that have clearly defined tasks, and the further directory structure change is to reflect this. No code changes here whatsoever, aside from fixing up a couple of include file pathnames. Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/nouveau/nva3_copy.fuc')
-rw-r--r--drivers/gpu/drm/nouveau/nva3_copy.fuc872
1 files changed, 0 insertions, 872 deletions
diff --git a/drivers/gpu/drm/nouveau/nva3_copy.fuc b/drivers/gpu/drm/nouveau/nva3_copy.fuc
deleted file mode 100644
index 219850d53286..000000000000
--- a/drivers/gpu/drm/nouveau/nva3_copy.fuc
+++ /dev/null
@@ -1,872 +0,0 @@
1/* fuc microcode for copy engine on nva3- chipsets
2 *
3 * Copyright 2011 Red Hat Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: Ben Skeggs
24 */
25
26/* To build for nva3:nvc0
27 * m4 -DNVA3 nva3_copy.fuc | envyas -a -w -m fuc -V nva3 -o nva3_copy.fuc.h
28 *
29 * To build for nvc0-
30 * m4 -DNVC0 nva3_copy.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_copy.fuc.h
31 */
32
33ifdef(`NVA3',
34.section #nva3_pcopy_data
35,
36.section #nvc0_pcopy_data
37)
38
39ctx_object: .b32 0
40ifdef(`NVA3',
41ctx_dma:
42ctx_dma_query: .b32 0
43ctx_dma_src: .b32 0
44ctx_dma_dst: .b32 0
45,)
46.equ #ctx_dma_count 3
47ctx_query_address_high: .b32 0
48ctx_query_address_low: .b32 0
49ctx_query_counter: .b32 0
50ctx_src_address_high: .b32 0
51ctx_src_address_low: .b32 0
52ctx_src_pitch: .b32 0
53ctx_src_tile_mode: .b32 0
54ctx_src_xsize: .b32 0
55ctx_src_ysize: .b32 0
56ctx_src_zsize: .b32 0
57ctx_src_zoff: .b32 0
58ctx_src_xoff: .b32 0
59ctx_src_yoff: .b32 0
60ctx_src_cpp: .b32 0
61ctx_dst_address_high: .b32 0
62ctx_dst_address_low: .b32 0
63ctx_dst_pitch: .b32 0
64ctx_dst_tile_mode: .b32 0
65ctx_dst_xsize: .b32 0
66ctx_dst_ysize: .b32 0
67ctx_dst_zsize: .b32 0
68ctx_dst_zoff: .b32 0
69ctx_dst_xoff: .b32 0
70ctx_dst_yoff: .b32 0
71ctx_dst_cpp: .b32 0
72ctx_format: .b32 0
73ctx_swz_const0: .b32 0
74ctx_swz_const1: .b32 0
75ctx_xcnt: .b32 0
76ctx_ycnt: .b32 0
77.align 256
78
79dispatch_table:
80// mthd 0x0000, NAME
81.b16 0x000 1
82.b32 #ctx_object ~0xffffffff
83// mthd 0x0100, NOP
84.b16 0x040 1
85.b32 0x00010000 + #cmd_nop ~0xffffffff
86// mthd 0x0140, PM_TRIGGER
87.b16 0x050 1
88.b32 0x00010000 + #cmd_pm_trigger ~0xffffffff
89ifdef(`NVA3', `
90// mthd 0x0180-0x018c, DMA_
91.b16 0x060 #ctx_dma_count
92dispatch_dma:
93.b32 0x00010000 + #cmd_dma ~0xffffffff
94.b32 0x00010000 + #cmd_dma ~0xffffffff
95.b32 0x00010000 + #cmd_dma ~0xffffffff
96',)
97// mthd 0x0200-0x0218, SRC_TILE
98.b16 0x80 7
99.b32 #ctx_src_tile_mode ~0x00000fff
100.b32 #ctx_src_xsize ~0x0007ffff
101.b32 #ctx_src_ysize ~0x00001fff
102.b32 #ctx_src_zsize ~0x000007ff
103.b32 #ctx_src_zoff ~0x00000fff
104.b32 #ctx_src_xoff ~0x0007ffff
105.b32 #ctx_src_yoff ~0x00001fff
106// mthd 0x0220-0x0238, DST_TILE
107.b16 0x88 7
108.b32 #ctx_dst_tile_mode ~0x00000fff
109.b32 #ctx_dst_xsize ~0x0007ffff
110.b32 #ctx_dst_ysize ~0x00001fff
111.b32 #ctx_dst_zsize ~0x000007ff
112.b32 #ctx_dst_zoff ~0x00000fff
113.b32 #ctx_dst_xoff ~0x0007ffff
114.b32 #ctx_dst_yoff ~0x00001fff
115// mthd 0x0300-0x0304, EXEC, WRCACHE_FLUSH
116.b16 0xc0 2
117.b32 0x00010000 + #cmd_exec ~0xffffffff
118.b32 0x00010000 + #cmd_wrcache_flush ~0xffffffff
119// mthd 0x030c-0x0340, various stuff
120.b16 0xc3 14
121.b32 #ctx_src_address_high ~0x000000ff
122.b32 #ctx_src_address_low ~0xffffffff
123.b32 #ctx_dst_address_high ~0x000000ff
124.b32 #ctx_dst_address_low ~0xffffffff
125.b32 #ctx_src_pitch ~0x0007ffff
126.b32 #ctx_dst_pitch ~0x0007ffff
127.b32 #ctx_xcnt ~0x0000ffff
128.b32 #ctx_ycnt ~0x00001fff
129.b32 #ctx_format ~0x0333ffff
130.b32 #ctx_swz_const0 ~0xffffffff
131.b32 #ctx_swz_const1 ~0xffffffff
132.b32 #ctx_query_address_high ~0x000000ff
133.b32 #ctx_query_address_low ~0xffffffff
134.b32 #ctx_query_counter ~0xffffffff
135.b16 0x800 0
136
137ifdef(`NVA3',
138.section #nva3_pcopy_code
139,
140.section #nvc0_pcopy_code
141)
142
143main:
144 clear b32 $r0
145 mov $sp $r0
146
147 // setup i0 handler and route fifo and ctxswitch to it
148 mov $r1 #ih
149 mov $iv0 $r1
150 mov $r1 0x400
151 movw $r2 0xfff3
152 sethi $r2 0
153 iowr I[$r1 + 0x300] $r2
154
155 // enable interrupts
156 or $r2 0xc
157 iowr I[$r1] $r2
158 bset $flags ie0
159
160 // enable fifo access and context switching
161 mov $r1 0x1200
162 mov $r2 3
163 iowr I[$r1] $r2
164
165 // sleep forever, waking for interrupts
166 bset $flags $p0
167 spin:
168 sleep $p0
169 bra #spin
170
171// i0 handler
172ih:
173 iord $r1 I[$r0 + 0x200]
174
175 and $r2 $r1 0x00000008
176 bra e #ih_no_chsw
177 call #chsw
178 ih_no_chsw:
179 and $r2 $r1 0x00000004
180 bra e #ih_no_cmd
181 call #dispatch
182
183 ih_no_cmd:
184 and $r1 $r1 0x0000000c
185 iowr I[$r0 + 0x100] $r1
186 iret
187
188// $p1 direction (0 = unload, 1 = load)
189// $r3 channel
190swctx:
191 mov $r4 0x7700
192 mov $xtargets $r4
193ifdef(`NVA3', `
194 // target 7 hardcoded to ctx dma object
195 mov $xdbase $r0
196', ` // NVC0
197 // read SCRATCH3 to decide if we are PCOPY0 or PCOPY1
198 mov $r4 0x2100
199 iord $r4 I[$r4 + 0]
200 and $r4 1
201 shl b32 $r4 4
202 add b32 $r4 0x30
203
204 // channel is in vram
205 mov $r15 0x61c
206 shl b32 $r15 6
207 mov $r5 0x114
208 iowrs I[$r15] $r5
209
210 // read 16-byte PCOPYn info, containing context pointer, from channel
211 shl b32 $r5 $r3 4
212 add b32 $r5 2
213 mov $xdbase $r5
214 mov $r5 $sp
215 // get a chunk of stack space, aligned to 256 byte boundary
216 sub b32 $r5 0x100
217 mov $r6 0xff
218 not b32 $r6
219 and $r5 $r6
220 sethi $r5 0x00020000
221 xdld $r4 $r5
222 xdwait
223 sethi $r5 0
224
225 // set context pointer, from within channel VM
226 mov $r14 0
227 iowrs I[$r15] $r14
228 ld b32 $r4 D[$r5 + 0]
229 shr b32 $r4 8
230 ld b32 $r6 D[$r5 + 4]
231 shl b32 $r6 24
232 or $r4 $r6
233 mov $xdbase $r4
234')
235 // 256-byte context, at start of data segment
236 mov b32 $r4 $r0
237 sethi $r4 0x60000
238
239 // swap!
240 bra $p1 #swctx_load
241 xdst $r0 $r4
242 bra #swctx_done
243 swctx_load:
244 xdld $r0 $r4
245 swctx_done:
246 xdwait
247 ret
248
249chsw:
250 // read current channel
251 mov $r2 0x1400
252 iord $r3 I[$r2]
253
254 // if it's active, unload it and return
255 xbit $r15 $r3 0x1e
256 bra e #chsw_no_unload
257 bclr $flags $p1
258 call #swctx
259 bclr $r3 0x1e
260 iowr I[$r2] $r3
261 mov $r4 1
262 iowr I[$r2 + 0x200] $r4
263 ret
264
265 // read next channel
266 chsw_no_unload:
267 iord $r3 I[$r2 + 0x100]
268
269 // is there a channel waiting to be loaded?
270 xbit $r13 $r3 0x1e
271 bra e #chsw_finish_load
272 bset $flags $p1
273 call #swctx
274ifdef(`NVA3',
275 // load dma objects back into TARGET regs
276 mov $r5 #ctx_dma
277 mov $r6 #ctx_dma_count
278 chsw_load_ctx_dma:
279 ld b32 $r7 D[$r5 + $r6 * 4]
280 add b32 $r8 $r6 0x180
281 shl b32 $r8 8
282 iowr I[$r8] $r7
283 sub b32 $r6 1
284 bra nc #chsw_load_ctx_dma
285,)
286
287 chsw_finish_load:
288 mov $r3 2
289 iowr I[$r2 + 0x200] $r3
290 ret
291
292dispatch:
293 // read incoming fifo command
294 mov $r3 0x1900
295 iord $r2 I[$r3 + 0x100]
296 iord $r3 I[$r3 + 0x000]
297 and $r4 $r2 0x7ff
298 // $r2 will be used to store exception data
299 shl b32 $r2 0x10
300
301 // lookup method in the dispatch table, ILLEGAL_MTHD if not found
302 mov $r5 #dispatch_table
303 clear b32 $r6
304 clear b32 $r7
305 dispatch_loop:
306 ld b16 $r6 D[$r5 + 0]
307 ld b16 $r7 D[$r5 + 2]
308 add b32 $r5 4
309 cmpu b32 $r4 $r6
310 bra c #dispatch_illegal_mthd
311 add b32 $r7 $r6
312 cmpu b32 $r4 $r7
313 bra c #dispatch_valid_mthd
314 sub b32 $r7 $r6
315 shl b32 $r7 3
316 add b32 $r5 $r7
317 bra #dispatch_loop
318
319 // ensure no bits set in reserved fields, INVALID_BITFIELD
320 dispatch_valid_mthd:
321 sub b32 $r4 $r6
322 shl b32 $r4 3
323 add b32 $r4 $r5
324 ld b32 $r5 D[$r4 + 4]
325 and $r5 $r3
326 cmpu b32 $r5 0
327 bra ne #dispatch_invalid_bitfield
328
329 // depending on dispatch flags: execute method, or save data as state
330 ld b16 $r5 D[$r4 + 0]
331 ld b16 $r6 D[$r4 + 2]
332 cmpu b32 $r6 0
333 bra ne #dispatch_cmd
334 st b32 D[$r5] $r3
335 bra #dispatch_done
336 dispatch_cmd:
337 bclr $flags $p1
338 call $r5
339 bra $p1 #dispatch_error
340 bra #dispatch_done
341
342 dispatch_invalid_bitfield:
343 or $r2 2
344 dispatch_illegal_mthd:
345 or $r2 1
346
347 // store exception data in SCRATCH0/SCRATCH1, signal hostirq
348 dispatch_error:
349 mov $r4 0x1000
350 iowr I[$r4 + 0x000] $r2
351 iowr I[$r4 + 0x100] $r3
352 mov $r2 0x40
353 iowr I[$r0] $r2
354 hostirq_wait:
355 iord $r2 I[$r0 + 0x200]
356 and $r2 0x40
357 cmpu b32 $r2 0
358 bra ne #hostirq_wait
359
360 dispatch_done:
361 mov $r2 0x1d00
362 mov $r3 1
363 iowr I[$r2] $r3
364 ret
365
366// No-operation
367//
368// Inputs:
369// $r1: irqh state
370// $r2: hostirq state
371// $r3: data
372// $r4: dispatch table entry
373// Outputs:
374// $r1: irqh state
375// $p1: set on error
376// $r2: hostirq state
377// $r3: data
378cmd_nop:
379 ret
380
381// PM_TRIGGER
382//
383// Inputs:
384// $r1: irqh state
385// $r2: hostirq state
386// $r3: data
387// $r4: dispatch table entry
388// Outputs:
389// $r1: irqh state
390// $p1: set on error
391// $r2: hostirq state
392// $r3: data
393cmd_pm_trigger:
394 mov $r2 0x2200
395 clear b32 $r3
396 sethi $r3 0x20000
397 iowr I[$r2] $r3
398 ret
399
400ifdef(`NVA3',
401// SET_DMA_* method handler
402//
403// Inputs:
404// $r1: irqh state
405// $r2: hostirq state
406// $r3: data
407// $r4: dispatch table entry
408// Outputs:
409// $r1: irqh state
410// $p1: set on error
411// $r2: hostirq state
412// $r3: data
413cmd_dma:
414 sub b32 $r4 #dispatch_dma
415 shr b32 $r4 1
416 bset $r3 0x1e
417 st b32 D[$r4 + #ctx_dma] $r3
418 add b32 $r4 0x600
419 shl b32 $r4 6
420 iowr I[$r4] $r3
421 ret
422,)
423
424// Calculates the hw swizzle mask and adjusts the surface's xcnt to match
425//
426cmd_exec_set_format:
427 // zero out a chunk of the stack to store the swizzle into
428 add $sp -0x10
429 st b32 D[$sp + 0x00] $r0
430 st b32 D[$sp + 0x04] $r0
431 st b32 D[$sp + 0x08] $r0
432 st b32 D[$sp + 0x0c] $r0
433
434 // extract cpp, src_ncomp and dst_ncomp from FORMAT
435 ld b32 $r4 D[$r0 + #ctx_format]
436 extr $r5 $r4 16:17
437 add b32 $r5 1
438 extr $r6 $r4 20:21
439 add b32 $r6 1
440 extr $r7 $r4 24:25
441 add b32 $r7 1
442
443 // convert FORMAT swizzle mask to hw swizzle mask
444 bclr $flags $p2
445 clear b32 $r8
446 clear b32 $r9
447 ncomp_loop:
448 and $r10 $r4 0xf
449 shr b32 $r4 4
450 clear b32 $r11
451 bpc_loop:
452 cmpu b8 $r10 4
453 bra nc #cmp_c0
454 mulu $r12 $r10 $r5
455 add b32 $r12 $r11
456 bset $flags $p2
457 bra #bpc_next
458 cmp_c0:
459 bra ne #cmp_c1
460 mov $r12 0x10
461 add b32 $r12 $r11
462 bra #bpc_next
463 cmp_c1:
464 cmpu b8 $r10 6
465 bra nc #cmp_zero
466 mov $r12 0x14
467 add b32 $r12 $r11
468 bra #bpc_next
469 cmp_zero:
470 mov $r12 0x80
471 bpc_next:
472 st b8 D[$sp + $r8] $r12
473 add b32 $r8 1
474 add b32 $r11 1
475 cmpu b32 $r11 $r5
476 bra c #bpc_loop
477 add b32 $r9 1
478 cmpu b32 $r9 $r7
479 bra c #ncomp_loop
480
481 // SRC_XCNT = (xcnt * src_cpp), or 0 if no src ref in swz (hw will hang)
482 mulu $r6 $r5
483 st b32 D[$r0 + #ctx_src_cpp] $r6
484 ld b32 $r8 D[$r0 + #ctx_xcnt]
485 mulu $r6 $r8
486 bra $p2 #dst_xcnt
487 clear b32 $r6
488
489 dst_xcnt:
490 mulu $r7 $r5
491 st b32 D[$r0 + #ctx_dst_cpp] $r7
492 mulu $r7 $r8
493
494 mov $r5 0x810
495 shl b32 $r5 6
496 iowr I[$r5 + 0x000] $r6
497 iowr I[$r5 + 0x100] $r7
498 add b32 $r5 0x800
499 ld b32 $r6 D[$r0 + #ctx_dst_cpp]
500 sub b32 $r6 1
501 shl b32 $r6 8
502 ld b32 $r7 D[$r0 + #ctx_src_cpp]
503 sub b32 $r7 1
504 or $r6 $r7
505 iowr I[$r5 + 0x000] $r6
506 add b32 $r5 0x100
507 ld b32 $r6 D[$sp + 0x00]
508 iowr I[$r5 + 0x000] $r6
509 ld b32 $r6 D[$sp + 0x04]
510 iowr I[$r5 + 0x100] $r6
511 ld b32 $r6 D[$sp + 0x08]
512 iowr I[$r5 + 0x200] $r6
513 ld b32 $r6 D[$sp + 0x0c]
514 iowr I[$r5 + 0x300] $r6
515 add b32 $r5 0x400
516 ld b32 $r6 D[$r0 + #ctx_swz_const0]
517 iowr I[$r5 + 0x000] $r6
518 ld b32 $r6 D[$r0 + #ctx_swz_const1]
519 iowr I[$r5 + 0x100] $r6
520 add $sp 0x10
521 ret
522
523// Setup to handle a tiled surface
524//
525// Calculates a number of parameters the hardware requires in order
526// to correctly handle tiling.
527//
528// Offset calculation is performed as follows (Tp/Th/Td from TILE_MODE):
529// nTx = round_up(w * cpp, 1 << Tp) >> Tp
530// nTy = round_up(h, 1 << Th) >> Th
531// Txo = (x * cpp) & ((1 << Tp) - 1)
532// Tx = (x * cpp) >> Tp
533// Tyo = y & ((1 << Th) - 1)
534// Ty = y >> Th
535// Tzo = z & ((1 << Td) - 1)
536// Tz = z >> Td
537//
538// off = (Tzo << Tp << Th) + (Tyo << Tp) + Txo
539// off += ((Tz * nTy * nTx)) + (Ty * nTx) + Tx) << Td << Th << Tp;
540//
541// Inputs:
542// $r4: hw command (0x104800)
543// $r5: ctx offset adjustment for src/dst selection
544// $p2: set if dst surface
545//
546cmd_exec_set_surface_tiled:
547 // translate TILE_MODE into Tp, Th, Td shift values
548 ld b32 $r7 D[$r5 + #ctx_src_tile_mode]
549 extr $r9 $r7 8:11
550 extr $r8 $r7 4:7
551ifdef(`NVA3',
552 add b32 $r8 2
553,
554 add b32 $r8 3
555)
556 extr $r7 $r7 0:3
557 cmp b32 $r7 0xe
558 bra ne #xtile64
559 mov $r7 4
560 bra #xtileok
561 xtile64:
562 xbit $r7 $flags $p2
563 add b32 $r7 17
564 bset $r4 $r7
565 mov $r7 6
566 xtileok:
567
568 // Op = (x * cpp) & ((1 << Tp) - 1)
569 // Tx = (x * cpp) >> Tp
570 ld b32 $r10 D[$r5 + #ctx_src_xoff]
571 ld b32 $r11 D[$r5 + #ctx_src_cpp]
572 mulu $r10 $r11
573 mov $r11 1
574 shl b32 $r11 $r7
575 sub b32 $r11 1
576 and $r12 $r10 $r11
577 shr b32 $r10 $r7
578
579 // Tyo = y & ((1 << Th) - 1)
580 // Ty = y >> Th
581 ld b32 $r13 D[$r5 + #ctx_src_yoff]
582 mov $r14 1
583 shl b32 $r14 $r8
584 sub b32 $r14 1
585 and $r11 $r13 $r14
586 shr b32 $r13 $r8
587
588 // YTILE = ((1 << Th) << 12) | ((1 << Th) - Tyo)
589 add b32 $r14 1
590 shl b32 $r15 $r14 12
591 sub b32 $r14 $r11
592 or $r15 $r14
593 xbit $r6 $flags $p2
594 add b32 $r6 0x208
595 shl b32 $r6 8
596 iowr I[$r6 + 0x000] $r15
597
598 // Op += Tyo << Tp
599 shl b32 $r11 $r7
600 add b32 $r12 $r11
601
602 // nTx = ((w * cpp) + ((1 << Tp) - 1) >> Tp)
603 ld b32 $r15 D[$r5 + #ctx_src_xsize]
604 ld b32 $r11 D[$r5 + #ctx_src_cpp]
605 mulu $r15 $r11
606 mov $r11 1
607 shl b32 $r11 $r7
608 sub b32 $r11 1
609 add b32 $r15 $r11
610 shr b32 $r15 $r7
611 push $r15
612
613 // nTy = (h + ((1 << Th) - 1)) >> Th
614 ld b32 $r15 D[$r5 + #ctx_src_ysize]
615 mov $r11 1
616 shl b32 $r11 $r8
617 sub b32 $r11 1
618 add b32 $r15 $r11
619 shr b32 $r15 $r8
620 push $r15
621
622 // Tys = Tp + Th
623 // CFG_YZ_TILE_SIZE = ((1 << Th) >> 2) << Td
624 add b32 $r7 $r8
625 sub b32 $r8 2
626 mov $r11 1
627 shl b32 $r11 $r8
628 shl b32 $r11 $r9
629
630 // Tzo = z & ((1 << Td) - 1)
631 // Tz = z >> Td
632 // Op += Tzo << Tys
633 // Ts = Tys + Td
634 ld b32 $r8 D[$r5 + #ctx_src_zoff]
635 mov $r14 1
636 shl b32 $r14 $r9
637 sub b32 $r14 1
638 and $r15 $r8 $r14
639 shl b32 $r15 $r7
640 add b32 $r12 $r15
641 add b32 $r7 $r9
642 shr b32 $r8 $r9
643
644 // Ot = ((Tz * nTy * nTx) + (Ty * nTx) + Tx) << Ts
645 pop $r15
646 pop $r9
647 mulu $r13 $r9
648 add b32 $r10 $r13
649 mulu $r8 $r9
650 mulu $r8 $r15
651 add b32 $r10 $r8
652 shl b32 $r10 $r7
653
654 // PITCH = (nTx - 1) << Ts
655 sub b32 $r9 1
656 shl b32 $r9 $r7
657 iowr I[$r6 + 0x200] $r9
658
659 // SRC_ADDRESS_LOW = (Ot + Op) & 0xffffffff
660 // CFG_ADDRESS_HIGH |= ((Ot + Op) >> 32) << 16
661 ld b32 $r7 D[$r5 + #ctx_src_address_low]
662 ld b32 $r8 D[$r5 + #ctx_src_address_high]
663 add b32 $r10 $r12
664 add b32 $r7 $r10
665 adc b32 $r8 0
666 shl b32 $r8 16
667 or $r8 $r11
668 sub b32 $r6 0x600
669 iowr I[$r6 + 0x000] $r7
670 add b32 $r6 0x400
671 iowr I[$r6 + 0x000] $r8
672 ret
673
674// Setup to handle a linear surface
675//
676// Nothing to see here.. Sets ADDRESS and PITCH, pretty non-exciting
677//
678cmd_exec_set_surface_linear:
679 xbit $r6 $flags $p2
680 add b32 $r6 0x202
681 shl b32 $r6 8
682 ld b32 $r7 D[$r5 + #ctx_src_address_low]
683 iowr I[$r6 + 0x000] $r7
684 add b32 $r6 0x400
685 ld b32 $r7 D[$r5 + #ctx_src_address_high]
686 shl b32 $r7 16
687 iowr I[$r6 + 0x000] $r7
688 add b32 $r6 0x400
689 ld b32 $r7 D[$r5 + #ctx_src_pitch]
690 iowr I[$r6 + 0x000] $r7
691 ret
692
693// wait for regs to be available for use
694cmd_exec_wait:
695 push $r0
696 push $r1
697 mov $r0 0x800
698 shl b32 $r0 6
699 loop:
700 iord $r1 I[$r0]
701 and $r1 1
702 bra ne #loop
703 pop $r1
704 pop $r0
705 ret
706
707cmd_exec_query:
708 // if QUERY_SHORT not set, write out { -, 0, TIME_LO, TIME_HI }
709 xbit $r4 $r3 13
710 bra ne #query_counter
711 call #cmd_exec_wait
712 mov $r4 0x80c
713 shl b32 $r4 6
714 ld b32 $r5 D[$r0 + #ctx_query_address_low]
715 add b32 $r5 4
716 iowr I[$r4 + 0x000] $r5
717 iowr I[$r4 + 0x100] $r0
718 mov $r5 0xc
719 iowr I[$r4 + 0x200] $r5
720 add b32 $r4 0x400
721 ld b32 $r5 D[$r0 + #ctx_query_address_high]
722 shl b32 $r5 16
723 iowr I[$r4 + 0x000] $r5
724 add b32 $r4 0x500
725 mov $r5 0x00000b00
726 sethi $r5 0x00010000
727 iowr I[$r4 + 0x000] $r5
728 mov $r5 0x00004040
729 shl b32 $r5 1
730 sethi $r5 0x80800000
731 iowr I[$r4 + 0x100] $r5
732 mov $r5 0x00001110
733 sethi $r5 0x13120000
734 iowr I[$r4 + 0x200] $r5
735 mov $r5 0x00001514
736 sethi $r5 0x17160000
737 iowr I[$r4 + 0x300] $r5
738 mov $r5 0x00002601
739 sethi $r5 0x00010000
740 mov $r4 0x800
741 shl b32 $r4 6
742 iowr I[$r4 + 0x000] $r5
743
744 // write COUNTER
745 query_counter:
746 call #cmd_exec_wait
747 mov $r4 0x80c
748 shl b32 $r4 6
749 ld b32 $r5 D[$r0 + #ctx_query_address_low]
750 iowr I[$r4 + 0x000] $r5
751 iowr I[$r4 + 0x100] $r0
752 mov $r5 0x4
753 iowr I[$r4 + 0x200] $r5
754 add b32 $r4 0x400
755 ld b32 $r5 D[$r0 + #ctx_query_address_high]
756 shl b32 $r5 16
757 iowr I[$r4 + 0x000] $r5
758 add b32 $r4 0x500
759 mov $r5 0x00000300
760 iowr I[$r4 + 0x000] $r5
761 mov $r5 0x00001110
762 sethi $r5 0x13120000
763 iowr I[$r4 + 0x100] $r5
764 ld b32 $r5 D[$r0 + #ctx_query_counter]
765 add b32 $r4 0x500
766 iowr I[$r4 + 0x000] $r5
767 mov $r5 0x00002601
768 sethi $r5 0x00010000
769 mov $r4 0x800
770 shl b32 $r4 6
771 iowr I[$r4 + 0x000] $r5
772 ret
773
774// Execute a copy operation
775//
776// Inputs:
777// $r1: irqh state
778// $r2: hostirq state
779// $r3: data
780// 000002000 QUERY_SHORT
781// 000001000 QUERY
782// 000000100 DST_LINEAR
783// 000000010 SRC_LINEAR
784// 000000001 FORMAT
785// $r4: dispatch table entry
786// Outputs:
787// $r1: irqh state
788// $p1: set on error
789// $r2: hostirq state
790// $r3: data
791cmd_exec:
792 call #cmd_exec_wait
793
794 // if format requested, call function to calculate it, otherwise
795 // fill in cpp/xcnt for both surfaces as if (cpp == 1)
796 xbit $r15 $r3 0
797 bra e #cmd_exec_no_format
798 call #cmd_exec_set_format
799 mov $r4 0x200
800 bra #cmd_exec_init_src_surface
801 cmd_exec_no_format:
802 mov $r6 0x810
803 shl b32 $r6 6
804 mov $r7 1
805 st b32 D[$r0 + #ctx_src_cpp] $r7
806 st b32 D[$r0 + #ctx_dst_cpp] $r7
807 ld b32 $r7 D[$r0 + #ctx_xcnt]
808 iowr I[$r6 + 0x000] $r7
809 iowr I[$r6 + 0x100] $r7
810 clear b32 $r4
811
812 cmd_exec_init_src_surface:
813 bclr $flags $p2
814 clear b32 $r5
815 xbit $r15 $r3 4
816 bra e #src_tiled
817 call #cmd_exec_set_surface_linear
818 bra #cmd_exec_init_dst_surface
819 src_tiled:
820 call #cmd_exec_set_surface_tiled
821 bset $r4 7
822
823 cmd_exec_init_dst_surface:
824 bset $flags $p2
825 mov $r5 #ctx_dst_address_high - #ctx_src_address_high
826 xbit $r15 $r3 8
827 bra e #dst_tiled
828 call #cmd_exec_set_surface_linear
829 bra #cmd_exec_kick
830 dst_tiled:
831 call #cmd_exec_set_surface_tiled
832 bset $r4 8
833
834 cmd_exec_kick:
835 mov $r5 0x800
836 shl b32 $r5 6
837 ld b32 $r6 D[$r0 + #ctx_ycnt]
838 iowr I[$r5 + 0x100] $r6
839 mov $r6 0x0041
840 // SRC_TARGET = 1, DST_TARGET = 2
841 sethi $r6 0x44000000
842 or $r4 $r6
843 iowr I[$r5] $r4
844
845 // if requested, queue up a QUERY write after the copy has completed
846 xbit $r15 $r3 12
847 bra e #cmd_exec_done
848 call #cmd_exec_query
849
850 cmd_exec_done:
851 ret
852
853// Flush write cache
854//
855// Inputs:
856// $r1: irqh state
857// $r2: hostirq state
858// $r3: data
859// $r4: dispatch table entry
860// Outputs:
861// $r1: irqh state
862// $p1: set on error
863// $r2: hostirq state
864// $r3: data
865cmd_wrcache_flush:
866 mov $r2 0x2200
867 clear b32 $r3
868 sethi $r3 0x10000
869 iowr I[$r2] $r3
870 ret
871
872.align 0x100