diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau/nva3_copy.fuc')
-rw-r--r-- | drivers/gpu/drm/nouveau/nva3_copy.fuc | 870 |
1 files changed, 870 insertions, 0 deletions
diff --git a/drivers/gpu/drm/nouveau/nva3_copy.fuc b/drivers/gpu/drm/nouveau/nva3_copy.fuc new file mode 100644 index 000000000000..eaf35f8321ee --- /dev/null +++ b/drivers/gpu/drm/nouveau/nva3_copy.fuc | |||
@@ -0,0 +1,870 @@ | |||
1 | /* fuc microcode for copy engine on nva3- chipsets | ||
2 | * | ||
3 | * Copyright 2011 Red Hat Inc. | ||
4 | * | ||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
6 | * copy of this software and associated documentation files (the "Software"), | ||
7 | * to deal in the Software without restriction, including without limitation | ||
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
9 | * and/or sell copies of the Software, and to permit persons to whom the | ||
10 | * Software is furnished to do so, subject to the following conditions: | ||
11 | * | ||
12 | * The above copyright notice and this permission notice shall be included in | ||
13 | * all copies or substantial portions of the Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
21 | * OTHER DEALINGS IN THE SOFTWARE. | ||
22 | * | ||
23 | * Authors: Ben Skeggs | ||
24 | */ | ||
25 | |||
26 | /* To build for nva3:nvc0 | ||
27 | * m4 -DNVA3 nva3_copy.fuc | envyas -a -w -m fuc -V nva3 -o nva3_copy.fuc.h | ||
28 | * | ||
29 | * To build for nvc0- | ||
30 | * m4 -DNVC0 nva3_copy.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_copy.fuc.h | ||
31 | */ | ||
32 | |||
33 | ifdef(`NVA3', | ||
34 | .section nva3_pcopy_data, | ||
35 | .section nvc0_pcopy_data | ||
36 | ) | ||
37 | |||
38 | ctx_object: .b32 0 | ||
39 | ifdef(`NVA3', | ||
40 | ctx_dma: | ||
41 | ctx_dma_query: .b32 0 | ||
42 | ctx_dma_src: .b32 0 | ||
43 | ctx_dma_dst: .b32 0 | ||
44 | ,) | ||
45 | .equ ctx_dma_count 3 | ||
46 | ctx_query_address_high: .b32 0 | ||
47 | ctx_query_address_low: .b32 0 | ||
48 | ctx_query_counter: .b32 0 | ||
49 | ctx_src_address_high: .b32 0 | ||
50 | ctx_src_address_low: .b32 0 | ||
51 | ctx_src_pitch: .b32 0 | ||
52 | ctx_src_tile_mode: .b32 0 | ||
53 | ctx_src_xsize: .b32 0 | ||
54 | ctx_src_ysize: .b32 0 | ||
55 | ctx_src_zsize: .b32 0 | ||
56 | ctx_src_zoff: .b32 0 | ||
57 | ctx_src_xoff: .b32 0 | ||
58 | ctx_src_yoff: .b32 0 | ||
59 | ctx_src_cpp: .b32 0 | ||
60 | ctx_dst_address_high: .b32 0 | ||
61 | ctx_dst_address_low: .b32 0 | ||
62 | ctx_dst_pitch: .b32 0 | ||
63 | ctx_dst_tile_mode: .b32 0 | ||
64 | ctx_dst_xsize: .b32 0 | ||
65 | ctx_dst_ysize: .b32 0 | ||
66 | ctx_dst_zsize: .b32 0 | ||
67 | ctx_dst_zoff: .b32 0 | ||
68 | ctx_dst_xoff: .b32 0 | ||
69 | ctx_dst_yoff: .b32 0 | ||
70 | ctx_dst_cpp: .b32 0 | ||
71 | ctx_format: .b32 0 | ||
72 | ctx_swz_const0: .b32 0 | ||
73 | ctx_swz_const1: .b32 0 | ||
74 | ctx_xcnt: .b32 0 | ||
75 | ctx_ycnt: .b32 0 | ||
76 | .align 256 | ||
77 | |||
78 | dispatch_table: | ||
79 | // mthd 0x0000, NAME | ||
80 | .b16 0x000 1 | ||
81 | .b32 ctx_object ~0xffffffff | ||
82 | // mthd 0x0100, NOP | ||
83 | .b16 0x040 1 | ||
84 | .b32 0x00010000 + cmd_nop ~0xffffffff | ||
85 | // mthd 0x0140, PM_TRIGGER | ||
86 | .b16 0x050 1 | ||
87 | .b32 0x00010000 + cmd_pm_trigger ~0xffffffff | ||
88 | ifdef(`NVA3', ` | ||
89 | // mthd 0x0180-0x018c, DMA_ | ||
90 | .b16 0x060 ctx_dma_count | ||
91 | dispatch_dma: | ||
92 | .b32 0x00010000 + cmd_dma ~0xffffffff | ||
93 | .b32 0x00010000 + cmd_dma ~0xffffffff | ||
94 | .b32 0x00010000 + cmd_dma ~0xffffffff | ||
95 | ',) | ||
96 | // mthd 0x0200-0x0218, SRC_TILE | ||
97 | .b16 0x80 7 | ||
98 | .b32 ctx_src_tile_mode ~0x00000fff | ||
99 | .b32 ctx_src_xsize ~0x0007ffff | ||
100 | .b32 ctx_src_ysize ~0x00001fff | ||
101 | .b32 ctx_src_zsize ~0x000007ff | ||
102 | .b32 ctx_src_zoff ~0x00000fff | ||
103 | .b32 ctx_src_xoff ~0x0007ffff | ||
104 | .b32 ctx_src_yoff ~0x00001fff | ||
105 | // mthd 0x0220-0x0238, DST_TILE | ||
106 | .b16 0x88 7 | ||
107 | .b32 ctx_dst_tile_mode ~0x00000fff | ||
108 | .b32 ctx_dst_xsize ~0x0007ffff | ||
109 | .b32 ctx_dst_ysize ~0x00001fff | ||
110 | .b32 ctx_dst_zsize ~0x000007ff | ||
111 | .b32 ctx_dst_zoff ~0x00000fff | ||
112 | .b32 ctx_dst_xoff ~0x0007ffff | ||
113 | .b32 ctx_dst_yoff ~0x00001fff | ||
114 | // mthd 0x0300-0x0304, EXEC, WRCACHE_FLUSH | ||
115 | .b16 0xc0 2 | ||
116 | .b32 0x00010000 + cmd_exec ~0xffffffff | ||
117 | .b32 0x00010000 + cmd_wrcache_flush ~0xffffffff | ||
118 | // mthd 0x030c-0x0340, various stuff | ||
119 | .b16 0xc3 14 | ||
120 | .b32 ctx_src_address_high ~0x000000ff | ||
121 | .b32 ctx_src_address_low ~0xfffffff0 | ||
122 | .b32 ctx_dst_address_high ~0x000000ff | ||
123 | .b32 ctx_dst_address_low ~0xfffffff0 | ||
124 | .b32 ctx_src_pitch ~0x0007ffff | ||
125 | .b32 ctx_dst_pitch ~0x0007ffff | ||
126 | .b32 ctx_xcnt ~0x0000ffff | ||
127 | .b32 ctx_ycnt ~0x00001fff | ||
128 | .b32 ctx_format ~0x0333ffff | ||
129 | .b32 ctx_swz_const0 ~0xffffffff | ||
130 | .b32 ctx_swz_const1 ~0xffffffff | ||
131 | .b32 ctx_query_address_high ~0x000000ff | ||
132 | .b32 ctx_query_address_low ~0xffffffff | ||
133 | .b32 ctx_query_counter ~0xffffffff | ||
134 | .b16 0x800 0 | ||
135 | |||
136 | ifdef(`NVA3', | ||
137 | .section nva3_pcopy_code, | ||
138 | .section nvc0_pcopy_code | ||
139 | ) | ||
140 | |||
141 | main: | ||
142 | clear b32 $r0 | ||
143 | mov $sp $r0 | ||
144 | |||
145 | // setup i0 handler and route fifo and ctxswitch to it | ||
146 | mov $r1 ih | ||
147 | mov $iv0 $r1 | ||
148 | mov $r1 0x400 | ||
149 | movw $r2 0xfff3 | ||
150 | sethi $r2 0 | ||
151 | iowr I[$r2 + 0x300] $r2 | ||
152 | |||
153 | // enable interrupts | ||
154 | or $r2 0xc | ||
155 | iowr I[$r1] $r2 | ||
156 | bset $flags ie0 | ||
157 | |||
158 | // enable fifo access and context switching | ||
159 | mov $r1 0x1200 | ||
160 | mov $r2 3 | ||
161 | iowr I[$r1] $r2 | ||
162 | |||
163 | // sleep forever, waking for interrupts | ||
164 | bset $flags $p0 | ||
165 | spin: | ||
166 | sleep $p0 | ||
167 | bra spin | ||
168 | |||
169 | // i0 handler | ||
170 | ih: | ||
171 | iord $r1 I[$r0 + 0x200] | ||
172 | |||
173 | and $r2 $r1 0x00000008 | ||
174 | bra e ih_no_chsw | ||
175 | call chsw | ||
176 | ih_no_chsw: | ||
177 | and $r2 $r1 0x00000004 | ||
178 | bra e ih_no_cmd | ||
179 | call dispatch | ||
180 | |||
181 | ih_no_cmd: | ||
182 | and $r1 $r1 0x0000000c | ||
183 | iowr I[$r0 + 0x100] $r1 | ||
184 | iret | ||
185 | |||
186 | // $p1 direction (0 = unload, 1 = load) | ||
187 | // $r3 channel | ||
188 | swctx: | ||
189 | mov $r4 0x7700 | ||
190 | mov $xtargets $r4 | ||
191 | ifdef(`NVA3', ` | ||
192 | // target 7 hardcoded to ctx dma object | ||
193 | mov $xdbase $r0 | ||
194 | ', ` // NVC0 | ||
195 | // read SCRATCH3 to decide if we are PCOPY0 or PCOPY1 | ||
196 | mov $r4 0x2100 | ||
197 | iord $r4 I[$r4 + 0] | ||
198 | and $r4 1 | ||
199 | shl b32 $r4 4 | ||
200 | add b32 $r4 0x30 | ||
201 | |||
202 | // channel is in vram | ||
203 | mov $r15 0x61c | ||
204 | shl b32 $r15 6 | ||
205 | mov $r5 0x114 | ||
206 | iowrs I[$r15] $r5 | ||
207 | |||
208 | // read 16-byte PCOPYn info, containing context pointer, from channel | ||
209 | shl b32 $r5 $r3 4 | ||
210 | add b32 $r5 2 | ||
211 | mov $xdbase $r5 | ||
212 | mov $r5 $sp | ||
213 | // get a chunk of stack space, aligned to 256 byte boundary | ||
214 | sub b32 $r5 0x100 | ||
215 | mov $r6 0xff | ||
216 | not b32 $r6 | ||
217 | and $r5 $r6 | ||
218 | sethi $r5 0x00020000 | ||
219 | xdld $r4 $r5 | ||
220 | xdwait | ||
221 | sethi $r5 0 | ||
222 | |||
223 | // set context pointer, from within channel VM | ||
224 | mov $r14 0 | ||
225 | iowrs I[$r15] $r14 | ||
226 | ld b32 $r4 D[$r5 + 0] | ||
227 | shr b32 $r4 8 | ||
228 | ld b32 $r6 D[$r5 + 4] | ||
229 | shl b32 $r6 24 | ||
230 | or $r4 $r6 | ||
231 | mov $xdbase $r4 | ||
232 | ') | ||
233 | // 256-byte context, at start of data segment | ||
234 | mov b32 $r4 $r0 | ||
235 | sethi $r4 0x60000 | ||
236 | |||
237 | // swap! | ||
238 | bra $p1 swctx_load | ||
239 | xdst $r0 $r4 | ||
240 | bra swctx_done | ||
241 | swctx_load: | ||
242 | xdld $r0 $r4 | ||
243 | swctx_done: | ||
244 | xdwait | ||
245 | ret | ||
246 | |||
247 | chsw: | ||
248 | // read current channel | ||
249 | mov $r2 0x1400 | ||
250 | iord $r3 I[$r2] | ||
251 | |||
252 | // if it's active, unload it and return | ||
253 | xbit $r15 $r3 0x1e | ||
254 | bra e chsw_no_unload | ||
255 | bclr $flags $p1 | ||
256 | call swctx | ||
257 | bclr $r3 0x1e | ||
258 | iowr I[$r2] $r3 | ||
259 | mov $r4 1 | ||
260 | iowr I[$r2 + 0x200] $r4 | ||
261 | ret | ||
262 | |||
263 | // read next channel | ||
264 | chsw_no_unload: | ||
265 | iord $r3 I[$r2 + 0x100] | ||
266 | |||
267 | // is there a channel waiting to be loaded? | ||
268 | xbit $r13 $r3 0x1e | ||
269 | bra e chsw_finish_load | ||
270 | bset $flags $p1 | ||
271 | call swctx | ||
272 | ifdef(`NVA3', | ||
273 | // load dma objects back into TARGET regs | ||
274 | mov $r5 ctx_dma | ||
275 | mov $r6 ctx_dma_count | ||
276 | chsw_load_ctx_dma: | ||
277 | ld b32 $r7 D[$r5 + $r6 * 4] | ||
278 | add b32 $r8 $r6 0x180 | ||
279 | shl b32 $r8 8 | ||
280 | iowr I[$r8] $r7 | ||
281 | sub b32 $r6 1 | ||
282 | bra nc chsw_load_ctx_dma | ||
283 | ,) | ||
284 | |||
285 | chsw_finish_load: | ||
286 | mov $r3 2 | ||
287 | iowr I[$r2 + 0x200] $r3 | ||
288 | ret | ||
289 | |||
290 | dispatch: | ||
291 | // read incoming fifo command | ||
292 | mov $r3 0x1900 | ||
293 | iord $r2 I[$r3 + 0x100] | ||
294 | iord $r3 I[$r3 + 0x000] | ||
295 | and $r4 $r2 0x7ff | ||
296 | // $r2 will be used to store exception data | ||
297 | shl b32 $r2 0x10 | ||
298 | |||
299 | // lookup method in the dispatch table, ILLEGAL_MTHD if not found | ||
300 | mov $r5 dispatch_table | ||
301 | clear b32 $r6 | ||
302 | clear b32 $r7 | ||
303 | dispatch_loop: | ||
304 | ld b16 $r6 D[$r5 + 0] | ||
305 | ld b16 $r7 D[$r5 + 2] | ||
306 | add b32 $r5 4 | ||
307 | cmpu b32 $r4 $r6 | ||
308 | bra c dispatch_illegal_mthd | ||
309 | add b32 $r7 $r6 | ||
310 | cmpu b32 $r4 $r7 | ||
311 | bra c dispatch_valid_mthd | ||
312 | sub b32 $r7 $r6 | ||
313 | shl b32 $r7 3 | ||
314 | add b32 $r5 $r7 | ||
315 | bra dispatch_loop | ||
316 | |||
317 | // ensure no bits set in reserved fields, INVALID_BITFIELD | ||
318 | dispatch_valid_mthd: | ||
319 | sub b32 $r4 $r6 | ||
320 | shl b32 $r4 3 | ||
321 | add b32 $r4 $r5 | ||
322 | ld b32 $r5 D[$r4 + 4] | ||
323 | and $r5 $r3 | ||
324 | cmpu b32 $r5 0 | ||
325 | bra ne dispatch_invalid_bitfield | ||
326 | |||
327 | // depending on dispatch flags: execute method, or save data as state | ||
328 | ld b16 $r5 D[$r4 + 0] | ||
329 | ld b16 $r6 D[$r4 + 2] | ||
330 | cmpu b32 $r6 0 | ||
331 | bra ne dispatch_cmd | ||
332 | st b32 D[$r5] $r3 | ||
333 | bra dispatch_done | ||
334 | dispatch_cmd: | ||
335 | bclr $flags $p1 | ||
336 | call $r5 | ||
337 | bra $p1 dispatch_error | ||
338 | bra dispatch_done | ||
339 | |||
340 | dispatch_invalid_bitfield: | ||
341 | or $r2 2 | ||
342 | dispatch_illegal_mthd: | ||
343 | or $r2 1 | ||
344 | |||
345 | // store exception data in SCRATCH0/SCRATCH1, signal hostirq | ||
346 | dispatch_error: | ||
347 | mov $r4 0x1000 | ||
348 | iowr I[$r4 + 0x000] $r2 | ||
349 | iowr I[$r4 + 0x100] $r3 | ||
350 | mov $r2 0x40 | ||
351 | iowr I[$r0] $r2 | ||
352 | hostirq_wait: | ||
353 | iord $r2 I[$r0 + 0x200] | ||
354 | and $r2 0x40 | ||
355 | cmpu b32 $r2 0 | ||
356 | bra ne hostirq_wait | ||
357 | |||
358 | dispatch_done: | ||
359 | mov $r2 0x1d00 | ||
360 | mov $r3 1 | ||
361 | iowr I[$r2] $r3 | ||
362 | ret | ||
363 | |||
364 | // No-operation | ||
365 | // | ||
366 | // Inputs: | ||
367 | // $r1: irqh state | ||
368 | // $r2: hostirq state | ||
369 | // $r3: data | ||
370 | // $r4: dispatch table entry | ||
371 | // Outputs: | ||
372 | // $r1: irqh state | ||
373 | // $p1: set on error | ||
374 | // $r2: hostirq state | ||
375 | // $r3: data | ||
376 | cmd_nop: | ||
377 | ret | ||
378 | |||
379 | // PM_TRIGGER | ||
380 | // | ||
381 | // Inputs: | ||
382 | // $r1: irqh state | ||
383 | // $r2: hostirq state | ||
384 | // $r3: data | ||
385 | // $r4: dispatch table entry | ||
386 | // Outputs: | ||
387 | // $r1: irqh state | ||
388 | // $p1: set on error | ||
389 | // $r2: hostirq state | ||
390 | // $r3: data | ||
391 | cmd_pm_trigger: | ||
392 | mov $r2 0x2200 | ||
393 | clear b32 $r3 | ||
394 | sethi $r3 0x20000 | ||
395 | iowr I[$r2] $r3 | ||
396 | ret | ||
397 | |||
398 | ifdef(`NVA3', | ||
399 | // SET_DMA_* method handler | ||
400 | // | ||
401 | // Inputs: | ||
402 | // $r1: irqh state | ||
403 | // $r2: hostirq state | ||
404 | // $r3: data | ||
405 | // $r4: dispatch table entry | ||
406 | // Outputs: | ||
407 | // $r1: irqh state | ||
408 | // $p1: set on error | ||
409 | // $r2: hostirq state | ||
410 | // $r3: data | ||
411 | cmd_dma: | ||
412 | sub b32 $r4 dispatch_dma | ||
413 | shr b32 $r4 1 | ||
414 | bset $r3 0x1e | ||
415 | st b32 D[$r4 + ctx_dma] $r3 | ||
416 | add b32 $r4 0x600 | ||
417 | shl b32 $r4 6 | ||
418 | iowr I[$r4] $r3 | ||
419 | ret | ||
420 | ,) | ||
421 | |||
422 | // Calculates the hw swizzle mask and adjusts the surface's xcnt to match | ||
423 | // | ||
424 | cmd_exec_set_format: | ||
425 | // zero out a chunk of the stack to store the swizzle into | ||
426 | add $sp -0x10 | ||
427 | st b32 D[$sp + 0x00] $r0 | ||
428 | st b32 D[$sp + 0x04] $r0 | ||
429 | st b32 D[$sp + 0x08] $r0 | ||
430 | st b32 D[$sp + 0x0c] $r0 | ||
431 | |||
432 | // extract cpp, src_ncomp and dst_ncomp from FORMAT | ||
433 | ld b32 $r4 D[$r0 + ctx_format] | ||
434 | extr $r5 $r4 16:17 | ||
435 | add b32 $r5 1 | ||
436 | extr $r6 $r4 20:21 | ||
437 | add b32 $r6 1 | ||
438 | extr $r7 $r4 24:25 | ||
439 | add b32 $r7 1 | ||
440 | |||
441 | // convert FORMAT swizzle mask to hw swizzle mask | ||
442 | bclr $flags $p2 | ||
443 | clear b32 $r8 | ||
444 | clear b32 $r9 | ||
445 | ncomp_loop: | ||
446 | and $r10 $r4 0xf | ||
447 | shr b32 $r4 4 | ||
448 | clear b32 $r11 | ||
449 | bpc_loop: | ||
450 | cmpu b8 $r10 4 | ||
451 | bra nc cmp_c0 | ||
452 | mulu $r12 $r10 $r5 | ||
453 | add b32 $r12 $r11 | ||
454 | bset $flags $p2 | ||
455 | bra bpc_next | ||
456 | cmp_c0: | ||
457 | bra ne cmp_c1 | ||
458 | mov $r12 0x10 | ||
459 | add b32 $r12 $r11 | ||
460 | bra bpc_next | ||
461 | cmp_c1: | ||
462 | cmpu b8 $r10 6 | ||
463 | bra nc cmp_zero | ||
464 | mov $r12 0x14 | ||
465 | add b32 $r12 $r11 | ||
466 | bra bpc_next | ||
467 | cmp_zero: | ||
468 | mov $r12 0x80 | ||
469 | bpc_next: | ||
470 | st b8 D[$sp + $r8] $r12 | ||
471 | add b32 $r8 1 | ||
472 | add b32 $r11 1 | ||
473 | cmpu b32 $r11 $r5 | ||
474 | bra c bpc_loop | ||
475 | add b32 $r9 1 | ||
476 | cmpu b32 $r9 $r7 | ||
477 | bra c ncomp_loop | ||
478 | |||
479 | // SRC_XCNT = (xcnt * src_cpp), or 0 if no src ref in swz (hw will hang) | ||
480 | mulu $r6 $r5 | ||
481 | st b32 D[$r0 + ctx_src_cpp] $r6 | ||
482 | ld b32 $r8 D[$r0 + ctx_xcnt] | ||
483 | mulu $r6 $r8 | ||
484 | bra $p2 dst_xcnt | ||
485 | clear b32 $r6 | ||
486 | |||
487 | dst_xcnt: | ||
488 | mulu $r7 $r5 | ||
489 | st b32 D[$r0 + ctx_dst_cpp] $r7 | ||
490 | mulu $r7 $r8 | ||
491 | |||
492 | mov $r5 0x810 | ||
493 | shl b32 $r5 6 | ||
494 | iowr I[$r5 + 0x000] $r6 | ||
495 | iowr I[$r5 + 0x100] $r7 | ||
496 | add b32 $r5 0x800 | ||
497 | ld b32 $r6 D[$r0 + ctx_dst_cpp] | ||
498 | sub b32 $r6 1 | ||
499 | shl b32 $r6 8 | ||
500 | ld b32 $r7 D[$r0 + ctx_src_cpp] | ||
501 | sub b32 $r7 1 | ||
502 | or $r6 $r7 | ||
503 | iowr I[$r5 + 0x000] $r6 | ||
504 | add b32 $r5 0x100 | ||
505 | ld b32 $r6 D[$sp + 0x00] | ||
506 | iowr I[$r5 + 0x000] $r6 | ||
507 | ld b32 $r6 D[$sp + 0x04] | ||
508 | iowr I[$r5 + 0x100] $r6 | ||
509 | ld b32 $r6 D[$sp + 0x08] | ||
510 | iowr I[$r5 + 0x200] $r6 | ||
511 | ld b32 $r6 D[$sp + 0x0c] | ||
512 | iowr I[$r5 + 0x300] $r6 | ||
513 | add b32 $r5 0x400 | ||
514 | ld b32 $r6 D[$r0 + ctx_swz_const0] | ||
515 | iowr I[$r5 + 0x000] $r6 | ||
516 | ld b32 $r6 D[$r0 + ctx_swz_const1] | ||
517 | iowr I[$r5 + 0x100] $r6 | ||
518 | add $sp 0x10 | ||
519 | ret | ||
520 | |||
521 | // Setup to handle a tiled surface | ||
522 | // | ||
523 | // Calculates a number of parameters the hardware requires in order | ||
524 | // to correctly handle tiling. | ||
525 | // | ||
526 | // Offset calculation is performed as follows (Tp/Th/Td from TILE_MODE): | ||
527 | // nTx = round_up(w * cpp, 1 << Tp) >> Tp | ||
528 | // nTy = round_up(h, 1 << Th) >> Th | ||
529 | // Txo = (x * cpp) & ((1 << Tp) - 1) | ||
530 | // Tx = (x * cpp) >> Tp | ||
531 | // Tyo = y & ((1 << Th) - 1) | ||
532 | // Ty = y >> Th | ||
533 | // Tzo = z & ((1 << Td) - 1) | ||
534 | // Tz = z >> Td | ||
535 | // | ||
536 | // off = (Tzo << Tp << Th) + (Tyo << Tp) + Txo | ||
537 | // off += ((Tz * nTy * nTx)) + (Ty * nTx) + Tx) << Td << Th << Tp; | ||
538 | // | ||
539 | // Inputs: | ||
540 | // $r4: hw command (0x104800) | ||
541 | // $r5: ctx offset adjustment for src/dst selection | ||
542 | // $p2: set if dst surface | ||
543 | // | ||
544 | cmd_exec_set_surface_tiled: | ||
545 | // translate TILE_MODE into Tp, Th, Td shift values | ||
546 | ld b32 $r7 D[$r5 + ctx_src_tile_mode] | ||
547 | extr $r9 $r7 8:11 | ||
548 | extr $r8 $r7 4:7 | ||
549 | ifdef(`NVA3', | ||
550 | add b32 $r8 2 | ||
551 | , | ||
552 | add b32 $r8 3 | ||
553 | ) | ||
554 | extr $r7 $r7 0:3 | ||
555 | cmp b32 $r7 0xe | ||
556 | bra ne xtile64 | ||
557 | mov $r7 4 | ||
558 | bra xtileok | ||
559 | xtile64: | ||
560 | xbit $r7 $flags $p2 | ||
561 | add b32 $r7 17 | ||
562 | bset $r4 $r7 | ||
563 | mov $r7 6 | ||
564 | xtileok: | ||
565 | |||
566 | // Op = (x * cpp) & ((1 << Tp) - 1) | ||
567 | // Tx = (x * cpp) >> Tp | ||
568 | ld b32 $r10 D[$r5 + ctx_src_xoff] | ||
569 | ld b32 $r11 D[$r5 + ctx_src_cpp] | ||
570 | mulu $r10 $r11 | ||
571 | mov $r11 1 | ||
572 | shl b32 $r11 $r7 | ||
573 | sub b32 $r11 1 | ||
574 | and $r12 $r10 $r11 | ||
575 | shr b32 $r10 $r7 | ||
576 | |||
577 | // Tyo = y & ((1 << Th) - 1) | ||
578 | // Ty = y >> Th | ||
579 | ld b32 $r13 D[$r5 + ctx_src_yoff] | ||
580 | mov $r14 1 | ||
581 | shl b32 $r14 $r8 | ||
582 | sub b32 $r14 1 | ||
583 | and $r11 $r13 $r14 | ||
584 | shr b32 $r13 $r8 | ||
585 | |||
586 | // YTILE = ((1 << Th) << 12) | ((1 << Th) - Tyo) | ||
587 | add b32 $r14 1 | ||
588 | shl b32 $r15 $r14 12 | ||
589 | sub b32 $r14 $r11 | ||
590 | or $r15 $r14 | ||
591 | xbit $r6 $flags $p2 | ||
592 | add b32 $r6 0x208 | ||
593 | shl b32 $r6 8 | ||
594 | iowr I[$r6 + 0x000] $r15 | ||
595 | |||
596 | // Op += Tyo << Tp | ||
597 | shl b32 $r11 $r7 | ||
598 | add b32 $r12 $r11 | ||
599 | |||
600 | // nTx = ((w * cpp) + ((1 << Tp) - 1) >> Tp) | ||
601 | ld b32 $r15 D[$r5 + ctx_src_xsize] | ||
602 | ld b32 $r11 D[$r5 + ctx_src_cpp] | ||
603 | mulu $r15 $r11 | ||
604 | mov $r11 1 | ||
605 | shl b32 $r11 $r7 | ||
606 | sub b32 $r11 1 | ||
607 | add b32 $r15 $r11 | ||
608 | shr b32 $r15 $r7 | ||
609 | push $r15 | ||
610 | |||
611 | // nTy = (h + ((1 << Th) - 1)) >> Th | ||
612 | ld b32 $r15 D[$r5 + ctx_src_ysize] | ||
613 | mov $r11 1 | ||
614 | shl b32 $r11 $r8 | ||
615 | sub b32 $r11 1 | ||
616 | add b32 $r15 $r11 | ||
617 | shr b32 $r15 $r8 | ||
618 | push $r15 | ||
619 | |||
620 | // Tys = Tp + Th | ||
621 | // CFG_YZ_TILE_SIZE = ((1 << Th) >> 2) << Td | ||
622 | add b32 $r7 $r8 | ||
623 | sub b32 $r8 2 | ||
624 | mov $r11 1 | ||
625 | shl b32 $r11 $r8 | ||
626 | shl b32 $r11 $r9 | ||
627 | |||
628 | // Tzo = z & ((1 << Td) - 1) | ||
629 | // Tz = z >> Td | ||
630 | // Op += Tzo << Tys | ||
631 | // Ts = Tys + Td | ||
632 | ld b32 $r8 D[$r5 + ctx_src_zoff] | ||
633 | mov $r14 1 | ||
634 | shl b32 $r14 $r9 | ||
635 | sub b32 $r14 1 | ||
636 | and $r15 $r8 $r14 | ||
637 | shl b32 $r15 $r7 | ||
638 | add b32 $r12 $r15 | ||
639 | add b32 $r7 $r9 | ||
640 | shr b32 $r8 $r9 | ||
641 | |||
642 | // Ot = ((Tz * nTy * nTx) + (Ty * nTx) + Tx) << Ts | ||
643 | pop $r15 | ||
644 | pop $r9 | ||
645 | mulu $r13 $r9 | ||
646 | add b32 $r10 $r13 | ||
647 | mulu $r8 $r9 | ||
648 | mulu $r8 $r15 | ||
649 | add b32 $r10 $r8 | ||
650 | shl b32 $r10 $r7 | ||
651 | |||
652 | // PITCH = (nTx - 1) << Ts | ||
653 | sub b32 $r9 1 | ||
654 | shl b32 $r9 $r7 | ||
655 | iowr I[$r6 + 0x200] $r9 | ||
656 | |||
657 | // SRC_ADDRESS_LOW = (Ot + Op) & 0xffffffff | ||
658 | // CFG_ADDRESS_HIGH |= ((Ot + Op) >> 32) << 16 | ||
659 | ld b32 $r7 D[$r5 + ctx_src_address_low] | ||
660 | ld b32 $r8 D[$r5 + ctx_src_address_high] | ||
661 | add b32 $r10 $r12 | ||
662 | add b32 $r7 $r10 | ||
663 | adc b32 $r8 0 | ||
664 | shl b32 $r8 16 | ||
665 | or $r8 $r11 | ||
666 | sub b32 $r6 0x600 | ||
667 | iowr I[$r6 + 0x000] $r7 | ||
668 | add b32 $r6 0x400 | ||
669 | iowr I[$r6 + 0x000] $r8 | ||
670 | ret | ||
671 | |||
672 | // Setup to handle a linear surface | ||
673 | // | ||
674 | // Nothing to see here.. Sets ADDRESS and PITCH, pretty non-exciting | ||
675 | // | ||
676 | cmd_exec_set_surface_linear: | ||
677 | xbit $r6 $flags $p2 | ||
678 | add b32 $r6 0x202 | ||
679 | shl b32 $r6 8 | ||
680 | ld b32 $r7 D[$r5 + ctx_src_address_low] | ||
681 | iowr I[$r6 + 0x000] $r7 | ||
682 | add b32 $r6 0x400 | ||
683 | ld b32 $r7 D[$r5 + ctx_src_address_high] | ||
684 | shl b32 $r7 16 | ||
685 | iowr I[$r6 + 0x000] $r7 | ||
686 | add b32 $r6 0x400 | ||
687 | ld b32 $r7 D[$r5 + ctx_src_pitch] | ||
688 | iowr I[$r6 + 0x000] $r7 | ||
689 | ret | ||
690 | |||
691 | // wait for regs to be available for use | ||
692 | cmd_exec_wait: | ||
693 | push $r0 | ||
694 | push $r1 | ||
695 | mov $r0 0x800 | ||
696 | shl b32 $r0 6 | ||
697 | loop: | ||
698 | iord $r1 I[$r0] | ||
699 | and $r1 1 | ||
700 | bra ne loop | ||
701 | pop $r1 | ||
702 | pop $r0 | ||
703 | ret | ||
704 | |||
705 | cmd_exec_query: | ||
706 | // if QUERY_SHORT not set, write out { -, 0, TIME_LO, TIME_HI } | ||
707 | xbit $r4 $r3 13 | ||
708 | bra ne query_counter | ||
709 | call cmd_exec_wait | ||
710 | mov $r4 0x80c | ||
711 | shl b32 $r4 6 | ||
712 | ld b32 $r5 D[$r0 + ctx_query_address_low] | ||
713 | add b32 $r5 4 | ||
714 | iowr I[$r4 + 0x000] $r5 | ||
715 | iowr I[$r4 + 0x100] $r0 | ||
716 | mov $r5 0xc | ||
717 | iowr I[$r4 + 0x200] $r5 | ||
718 | add b32 $r4 0x400 | ||
719 | ld b32 $r5 D[$r0 + ctx_query_address_high] | ||
720 | shl b32 $r5 16 | ||
721 | iowr I[$r4 + 0x000] $r5 | ||
722 | add b32 $r4 0x500 | ||
723 | mov $r5 0x00000b00 | ||
724 | sethi $r5 0x00010000 | ||
725 | iowr I[$r4 + 0x000] $r5 | ||
726 | mov $r5 0x00004040 | ||
727 | shl b32 $r5 1 | ||
728 | sethi $r5 0x80800000 | ||
729 | iowr I[$r4 + 0x100] $r5 | ||
730 | mov $r5 0x00001110 | ||
731 | sethi $r5 0x13120000 | ||
732 | iowr I[$r4 + 0x200] $r5 | ||
733 | mov $r5 0x00001514 | ||
734 | sethi $r5 0x17160000 | ||
735 | iowr I[$r4 + 0x300] $r5 | ||
736 | mov $r5 0x00002601 | ||
737 | sethi $r5 0x00010000 | ||
738 | mov $r4 0x800 | ||
739 | shl b32 $r4 6 | ||
740 | iowr I[$r4 + 0x000] $r5 | ||
741 | |||
742 | // write COUNTER | ||
743 | query_counter: | ||
744 | call cmd_exec_wait | ||
745 | mov $r4 0x80c | ||
746 | shl b32 $r4 6 | ||
747 | ld b32 $r5 D[$r0 + ctx_query_address_low] | ||
748 | iowr I[$r4 + 0x000] $r5 | ||
749 | iowr I[$r4 + 0x100] $r0 | ||
750 | mov $r5 0x4 | ||
751 | iowr I[$r4 + 0x200] $r5 | ||
752 | add b32 $r4 0x400 | ||
753 | ld b32 $r5 D[$r0 + ctx_query_address_high] | ||
754 | shl b32 $r5 16 | ||
755 | iowr I[$r4 + 0x000] $r5 | ||
756 | add b32 $r4 0x500 | ||
757 | mov $r5 0x00000300 | ||
758 | iowr I[$r4 + 0x000] $r5 | ||
759 | mov $r5 0x00001110 | ||
760 | sethi $r5 0x13120000 | ||
761 | iowr I[$r4 + 0x100] $r5 | ||
762 | ld b32 $r5 D[$r0 + ctx_query_counter] | ||
763 | add b32 $r4 0x500 | ||
764 | iowr I[$r4 + 0x000] $r5 | ||
765 | mov $r5 0x00002601 | ||
766 | sethi $r5 0x00010000 | ||
767 | mov $r4 0x800 | ||
768 | shl b32 $r4 6 | ||
769 | iowr I[$r4 + 0x000] $r5 | ||
770 | ret | ||
771 | |||
772 | // Execute a copy operation | ||
773 | // | ||
774 | // Inputs: | ||
775 | // $r1: irqh state | ||
776 | // $r2: hostirq state | ||
777 | // $r3: data | ||
778 | // 000002000 QUERY_SHORT | ||
779 | // 000001000 QUERY | ||
780 | // 000000100 DST_LINEAR | ||
781 | // 000000010 SRC_LINEAR | ||
782 | // 000000001 FORMAT | ||
783 | // $r4: dispatch table entry | ||
784 | // Outputs: | ||
785 | // $r1: irqh state | ||
786 | // $p1: set on error | ||
787 | // $r2: hostirq state | ||
788 | // $r3: data | ||
789 | cmd_exec: | ||
790 | call cmd_exec_wait | ||
791 | |||
792 | // if format requested, call function to calculate it, otherwise | ||
793 | // fill in cpp/xcnt for both surfaces as if (cpp == 1) | ||
794 | xbit $r15 $r3 0 | ||
795 | bra e cmd_exec_no_format | ||
796 | call cmd_exec_set_format | ||
797 | mov $r4 0x200 | ||
798 | bra cmd_exec_init_src_surface | ||
799 | cmd_exec_no_format: | ||
800 | mov $r6 0x810 | ||
801 | shl b32 $r6 6 | ||
802 | mov $r7 1 | ||
803 | st b32 D[$r0 + ctx_src_cpp] $r7 | ||
804 | st b32 D[$r0 + ctx_dst_cpp] $r7 | ||
805 | ld b32 $r7 D[$r0 + ctx_xcnt] | ||
806 | iowr I[$r6 + 0x000] $r7 | ||
807 | iowr I[$r6 + 0x100] $r7 | ||
808 | clear b32 $r4 | ||
809 | |||
810 | cmd_exec_init_src_surface: | ||
811 | bclr $flags $p2 | ||
812 | clear b32 $r5 | ||
813 | xbit $r15 $r3 4 | ||
814 | bra e src_tiled | ||
815 | call cmd_exec_set_surface_linear | ||
816 | bra cmd_exec_init_dst_surface | ||
817 | src_tiled: | ||
818 | call cmd_exec_set_surface_tiled | ||
819 | bset $r4 7 | ||
820 | |||
821 | cmd_exec_init_dst_surface: | ||
822 | bset $flags $p2 | ||
823 | mov $r5 ctx_dst_address_high - ctx_src_address_high | ||
824 | xbit $r15 $r3 8 | ||
825 | bra e dst_tiled | ||
826 | call cmd_exec_set_surface_linear | ||
827 | bra cmd_exec_kick | ||
828 | dst_tiled: | ||
829 | call cmd_exec_set_surface_tiled | ||
830 | bset $r4 8 | ||
831 | |||
832 | cmd_exec_kick: | ||
833 | mov $r5 0x800 | ||
834 | shl b32 $r5 6 | ||
835 | ld b32 $r6 D[$r0 + ctx_ycnt] | ||
836 | iowr I[$r5 + 0x100] $r6 | ||
837 | mov $r6 0x0041 | ||
838 | // SRC_TARGET = 1, DST_TARGET = 2 | ||
839 | sethi $r6 0x44000000 | ||
840 | or $r4 $r6 | ||
841 | iowr I[$r5] $r4 | ||
842 | |||
843 | // if requested, queue up a QUERY write after the copy has completed | ||
844 | xbit $r15 $r3 12 | ||
845 | bra e cmd_exec_done | ||
846 | call cmd_exec_query | ||
847 | |||
848 | cmd_exec_done: | ||
849 | ret | ||
850 | |||
851 | // Flush write cache | ||
852 | // | ||
853 | // Inputs: | ||
854 | // $r1: irqh state | ||
855 | // $r2: hostirq state | ||
856 | // $r3: data | ||
857 | // $r4: dispatch table entry | ||
858 | // Outputs: | ||
859 | // $r1: irqh state | ||
860 | // $p1: set on error | ||
861 | // $r2: hostirq state | ||
862 | // $r3: data | ||
863 | cmd_wrcache_flush: | ||
864 | mov $r2 0x2200 | ||
865 | clear b32 $r3 | ||
866 | sethi $r3 0x10000 | ||
867 | iowr I[$r2] $r3 | ||
868 | ret | ||
869 | |||
870 | .align 0x100 | ||