diff options
author | Ben Skeggs <bskeggs@redhat.com> | 2013-06-04 20:28:12 -0400 |
---|---|---|
committer | Ben Skeggs <bskeggs@redhat.com> | 2013-06-30 23:50:50 -0400 |
commit | e99716f13d3a499f95a17e5442ef39270e4fc38b (patch) | |
tree | 57f3f15fa2bf77faabadc3e37fee8967378e840e /drivers/gpu/drm/nouveau | |
parent | 05f9a5bc58381f58095d8789e1c2d4e18758c2bc (diff) |
drm/gr/nvc0-: merge nvc0/nve0 ucode, and use cpp instead of m4
No code changes, proven by envyas producing identical binaries.
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/nouveau')
14 files changed, 1270 insertions, 2587 deletions
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/nvc0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/com.fuc index e6b228844a32..da18885c559c 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/nvc0.fuc +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/com.fuc | |||
@@ -23,42 +23,7 @@ | |||
23 | * Authors: Ben Skeggs | 23 | * Authors: Ben Skeggs |
24 | */ | 24 | */ |
25 | 25 | ||
26 | define(`mmctx_data', `.b32 eval((($2 - 1) << 26) | $1)') | 26 | #ifdef INCLUDE_CODE |
27 | define(`queue_init', `.skip eval((2 * 4) + ((8 * 4) * 2))') | ||
28 | |||
29 | ifdef(`include_code', ` | ||
30 | // Error codes | ||
31 | define(`E_BAD_COMMAND', 0x01) | ||
32 | define(`E_CMD_OVERFLOW', 0x02) | ||
33 | |||
34 | // Util macros to help with debugging ucode hangs etc | ||
35 | define(`T_WAIT', 0) | ||
36 | define(`T_MMCTX', 1) | ||
37 | define(`T_STRWAIT', 2) | ||
38 | define(`T_STRINIT', 3) | ||
39 | define(`T_AUTO', 4) | ||
40 | define(`T_CHAN', 5) | ||
41 | define(`T_LOAD', 6) | ||
42 | define(`T_SAVE', 7) | ||
43 | define(`T_LCHAN', 8) | ||
44 | define(`T_LCTXH', 9) | ||
45 | |||
46 | define(`trace_set', ` | ||
47 | mov $r8 0x83c | ||
48 | shl b32 $r8 6 | ||
49 | clear b32 $r9 | ||
50 | bset $r9 $1 | ||
51 | iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7] | ||
52 | ') | ||
53 | |||
54 | define(`trace_clr', ` | ||
55 | mov $r8 0x85c | ||
56 | shl b32 $r8 6 | ||
57 | clear b32 $r9 | ||
58 | bset $r9 $1 | ||
59 | iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7] | ||
60 | ') | ||
61 | |||
62 | // queue_put - add request to queue | 27 | // queue_put - add request to queue |
63 | // | 28 | // |
64 | // In : $r13 queue pointer | 29 | // In : $r13 queue pointer |
@@ -178,27 +143,41 @@ watchdog_clear: | |||
178 | iowr I[$r8 + 0x000] $r0 | 143 | iowr I[$r8 + 0x000] $r0 |
179 | ret | 144 | ret |
180 | 145 | ||
181 | // wait_done{z,o} - wait on FUC_DONE bit to become clear/set | 146 | // wait_donez - wait on FUC_DONE bit to become clear |
147 | // | ||
148 | // In : $r10 bit to wait on | ||
149 | // | ||
150 | wait_donez: | ||
151 | trace_set(T_WAIT); | ||
152 | mov $r8 0x818 | ||
153 | shl b32 $r8 6 | ||
154 | iowr I[$r8 + 0x000] $r10 | ||
155 | wait_donez_ne: | ||
156 | mov $r8 0x400 | ||
157 | shl b32 $r8 6 | ||
158 | iord $r8 I[$r8 + 0x000] | ||
159 | xbit $r8 $r8 $r10 | ||
160 | bra ne #wait_donez_ne | ||
161 | trace_clr(T_WAIT) | ||
162 | ret | ||
163 | |||
164 | // wait_doneo - wait on FUC_DONE bit to become set | ||
182 | // | 165 | // |
183 | // In : $r10 bit to wait on | 166 | // In : $r10 bit to wait on |
184 | // | 167 | // |
185 | define(`wait_done', ` | 168 | wait_doneo: |
186 | $1: | ||
187 | trace_set(T_WAIT); | 169 | trace_set(T_WAIT); |
188 | mov $r8 0x818 | 170 | mov $r8 0x818 |
189 | shl b32 $r8 6 | 171 | shl b32 $r8 6 |
190 | iowr I[$r8 + 0x000] $r10 // CC_SCRATCH[6] = wait bit | 172 | iowr I[$r8 + 0x000] $r10 |
191 | wait_done_$1: | 173 | wait_doneo_e: |
192 | mov $r8 0x400 | 174 | mov $r8 0x400 |
193 | shl b32 $r8 6 | 175 | shl b32 $r8 6 |
194 | iord $r8 I[$r8 + 0x000] // DONE | 176 | iord $r8 I[$r8 + 0x000] |
195 | xbit $r8 $r8 $r10 | 177 | xbit $r8 $r8 $r10 |
196 | bra $2 #wait_done_$1 | 178 | bra e #wait_doneo_e |
197 | trace_clr(T_WAIT) | 179 | trace_clr(T_WAIT) |
198 | ret | 180 | ret |
199 | ') | ||
200 | wait_done(wait_donez, ne) | ||
201 | wait_done(wait_doneo, e) | ||
202 | 181 | ||
203 | // mmctx_size - determine size of a mmio list transfer | 182 | // mmctx_size - determine size of a mmio list transfer |
204 | // | 183 | // |
@@ -397,4 +376,4 @@ strand_ctx_init: | |||
397 | sub b32 $r15 $r14 $r15 | 376 | sub b32 $r15 $r14 $r15 |
398 | trace_clr(T_STRINIT) | 377 | trace_clr(T_STRINIT) |
399 | ret | 378 | ret |
400 | ') | 379 | #endif |
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpc.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpc.fuc new file mode 100644 index 000000000000..4770e8c99432 --- /dev/null +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpc.fuc | |||
@@ -0,0 +1,369 @@ | |||
1 | /* fuc microcode for nvc0 PGRAPH/GPC | ||
2 | * | ||
3 | * Copyright 2011 Red Hat Inc. | ||
4 | * | ||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
6 | * copy of this software and associated documentation files (the "Software"), | ||
7 | * to deal in the Software without restriction, including without limitation | ||
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
9 | * and/or sell copies of the Software, and to permit persons to whom the | ||
10 | * Software is furnished to do so, subject to the following conditions: | ||
11 | * | ||
12 | * The above copyright notice and this permission notice shall be included in | ||
13 | * all copies or substantial portions of the Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
21 | * OTHER DEALINGS IN THE SOFTWARE. | ||
22 | * | ||
23 | * Authors: Ben Skeggs | ||
24 | */ | ||
25 | |||
26 | /* TODO | ||
27 | * - bracket certain functions with scratch writes, useful for debugging | ||
28 | * - watchdog timer around ctx operations | ||
29 | */ | ||
30 | |||
31 | #ifdef INCLUDE_DATA | ||
32 | gpc_id: .b32 0 | ||
33 | gpc_mmio_list_head: .b32 0 | ||
34 | gpc_mmio_list_tail: .b32 0 | ||
35 | |||
36 | tpc_count: .b32 0 | ||
37 | tpc_mask: .b32 0 | ||
38 | tpc_mmio_list_head: .b32 0 | ||
39 | tpc_mmio_list_tail: .b32 0 | ||
40 | |||
41 | cmd_queue: queue_init | ||
42 | #endif | ||
43 | |||
44 | #ifdef INCLUDE_CODE | ||
45 | // reports an exception to the host | ||
46 | // | ||
47 | // In: $r15 error code (see nvc0.fuc) | ||
48 | // | ||
49 | error: | ||
50 | push $r14 | ||
51 | mov $r14 -0x67ec // 0x9814 | ||
52 | sethi $r14 0x400000 | ||
53 | call #nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code | ||
54 | add b32 $r14 0x41c | ||
55 | mov $r15 1 | ||
56 | call #nv_wr32 // HUB_CTXCTL_INTR_UP_SET | ||
57 | pop $r14 | ||
58 | ret | ||
59 | |||
60 | // GPC fuc initialisation, executed by triggering ucode start, will | ||
61 | // fall through to main loop after completion. | ||
62 | // | ||
63 | // Input: | ||
64 | // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) | ||
65 | // CC_SCRATCH[1]: context base | ||
66 | // | ||
67 | // Output: | ||
68 | // CC_SCRATCH[0]: | ||
69 | // 31:31: set to signal completion | ||
70 | // CC_SCRATCH[1]: | ||
71 | // 31:0: GPC context size | ||
72 | // | ||
73 | init: | ||
74 | clear b32 $r0 | ||
75 | mov $sp $r0 | ||
76 | |||
77 | // enable fifo access | ||
78 | mov $r1 0x1200 | ||
79 | mov $r2 2 | ||
80 | iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE | ||
81 | |||
82 | // setup i0 handler, and route all interrupts to it | ||
83 | mov $r1 #ih | ||
84 | mov $iv0 $r1 | ||
85 | mov $r1 0x400 | ||
86 | iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH | ||
87 | |||
88 | // enable fifo interrupt | ||
89 | mov $r2 4 | ||
90 | iowr I[$r1 + 0x000] $r2 // INTR_EN_SET | ||
91 | |||
92 | // enable interrupts | ||
93 | bset $flags ie0 | ||
94 | |||
95 | // figure out which GPC we are, and how many TPCs we have | ||
96 | mov $r1 0x608 | ||
97 | shl b32 $r1 6 | ||
98 | iord $r2 I[$r1 + 0x000] // UNITS | ||
99 | mov $r3 1 | ||
100 | and $r2 0x1f | ||
101 | shl b32 $r3 $r2 | ||
102 | sub b32 $r3 1 | ||
103 | st b32 D[$r0 + #tpc_count] $r2 | ||
104 | st b32 D[$r0 + #tpc_mask] $r3 | ||
105 | add b32 $r1 0x400 | ||
106 | iord $r2 I[$r1 + 0x000] // MYINDEX | ||
107 | st b32 D[$r0 + #gpc_id] $r2 | ||
108 | |||
109 | // find context data for this chipset | ||
110 | mov $r2 0x800 | ||
111 | shl b32 $r2 6 | ||
112 | iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] | ||
113 | mov $r1 #chipsets - 12 | ||
114 | init_find_chipset: | ||
115 | add b32 $r1 12 | ||
116 | ld b32 $r3 D[$r1 + 0x00] | ||
117 | cmpu b32 $r3 $r2 | ||
118 | bra e #init_context | ||
119 | cmpu b32 $r3 0 | ||
120 | bra ne #init_find_chipset | ||
121 | // unknown chipset | ||
122 | ret | ||
123 | |||
124 | // initialise context base, and size tracking | ||
125 | init_context: | ||
126 | mov $r2 0x800 | ||
127 | shl b32 $r2 6 | ||
128 | iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base | ||
129 | clear b32 $r3 // track GPC context size here | ||
130 | |||
131 | // set mmctx base addresses now so we don't have to do it later, | ||
132 | // they don't currently ever change | ||
133 | mov $r4 0x700 | ||
134 | shl b32 $r4 6 | ||
135 | shr b32 $r5 $r2 8 | ||
136 | iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE | ||
137 | iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE | ||
138 | |||
139 | // calculate GPC mmio context size, store the chipset-specific | ||
140 | // mmio list pointers somewhere we can get at them later without | ||
141 | // re-parsing the chipset list | ||
142 | clear b32 $r14 | ||
143 | clear b32 $r15 | ||
144 | ld b16 $r14 D[$r1 + 4] | ||
145 | ld b16 $r15 D[$r1 + 6] | ||
146 | st b16 D[$r0 + #gpc_mmio_list_head] $r14 | ||
147 | st b16 D[$r0 + #gpc_mmio_list_tail] $r15 | ||
148 | call #mmctx_size | ||
149 | add b32 $r2 $r15 | ||
150 | add b32 $r3 $r15 | ||
151 | |||
152 | // calculate per-TPC mmio context size, store the list pointers | ||
153 | ld b16 $r14 D[$r1 + 8] | ||
154 | ld b16 $r15 D[$r1 + 10] | ||
155 | st b16 D[$r0 + #tpc_mmio_list_head] $r14 | ||
156 | st b16 D[$r0 + #tpc_mmio_list_tail] $r15 | ||
157 | call #mmctx_size | ||
158 | ld b32 $r14 D[$r0 + #tpc_count] | ||
159 | mulu $r14 $r15 | ||
160 | add b32 $r2 $r14 | ||
161 | add b32 $r3 $r14 | ||
162 | |||
163 | // round up base/size to 256 byte boundary (for strand SWBASE) | ||
164 | add b32 $r4 0x1300 | ||
165 | shr b32 $r3 2 | ||
166 | iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!? | ||
167 | shr b32 $r2 8 | ||
168 | shr b32 $r3 6 | ||
169 | add b32 $r2 1 | ||
170 | add b32 $r3 1 | ||
171 | shl b32 $r2 8 | ||
172 | shl b32 $r3 8 | ||
173 | |||
174 | // calculate size of strand context data | ||
175 | mov b32 $r15 $r2 | ||
176 | call #strand_ctx_init | ||
177 | add b32 $r3 $r15 | ||
178 | |||
179 | // save context size, and tell HUB we're done | ||
180 | mov $r1 0x800 | ||
181 | shl b32 $r1 6 | ||
182 | iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size | ||
183 | add b32 $r1 0x800 | ||
184 | clear b32 $r2 | ||
185 | bset $r2 31 | ||
186 | iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 | ||
187 | |||
188 | // Main program loop, very simple, sleeps until woken up by the interrupt | ||
189 | // handler, pulls a command from the queue and executes its handler | ||
190 | // | ||
191 | main: | ||
192 | bset $flags $p0 | ||
193 | sleep $p0 | ||
194 | mov $r13 #cmd_queue | ||
195 | call #queue_get | ||
196 | bra $p1 #main | ||
197 | |||
198 | // 0x0000-0x0003 are all context transfers | ||
199 | cmpu b32 $r14 0x04 | ||
200 | bra nc #main_not_ctx_xfer | ||
201 | // fetch $flags and mask off $p1/$p2 | ||
202 | mov $r1 $flags | ||
203 | mov $r2 0x0006 | ||
204 | not b32 $r2 | ||
205 | and $r1 $r2 | ||
206 | // set $p1/$p2 according to transfer type | ||
207 | shl b32 $r14 1 | ||
208 | or $r1 $r14 | ||
209 | mov $flags $r1 | ||
210 | // transfer context data | ||
211 | call #ctx_xfer | ||
212 | bra #main | ||
213 | |||
214 | main_not_ctx_xfer: | ||
215 | shl b32 $r15 $r14 16 | ||
216 | or $r15 E_BAD_COMMAND | ||
217 | call #error | ||
218 | bra #main | ||
219 | |||
220 | // interrupt handler | ||
221 | ih: | ||
222 | push $r8 | ||
223 | mov $r8 $flags | ||
224 | push $r8 | ||
225 | push $r9 | ||
226 | push $r10 | ||
227 | push $r11 | ||
228 | push $r13 | ||
229 | push $r14 | ||
230 | push $r15 | ||
231 | |||
232 | // incoming fifo command? | ||
233 | iord $r10 I[$r0 + 0x200] // INTR | ||
234 | and $r11 $r10 0x00000004 | ||
235 | bra e #ih_no_fifo | ||
236 | // queue incoming fifo command for later processing | ||
237 | mov $r11 0x1900 | ||
238 | mov $r13 #cmd_queue | ||
239 | iord $r14 I[$r11 + 0x100] // FIFO_CMD | ||
240 | iord $r15 I[$r11 + 0x000] // FIFO_DATA | ||
241 | call #queue_put | ||
242 | add b32 $r11 0x400 | ||
243 | mov $r14 1 | ||
244 | iowr I[$r11 + 0x000] $r14 // FIFO_ACK | ||
245 | |||
246 | // ack, and wake up main() | ||
247 | ih_no_fifo: | ||
248 | iowr I[$r0 + 0x100] $r10 // INTR_ACK | ||
249 | |||
250 | pop $r15 | ||
251 | pop $r14 | ||
252 | pop $r13 | ||
253 | pop $r11 | ||
254 | pop $r10 | ||
255 | pop $r9 | ||
256 | pop $r8 | ||
257 | mov $flags $r8 | ||
258 | pop $r8 | ||
259 | bclr $flags $p0 | ||
260 | iret | ||
261 | |||
262 | // Set this GPC's bit in HUB_BAR, used to signal completion of various | ||
263 | // activities to the HUB fuc | ||
264 | // | ||
265 | hub_barrier_done: | ||
266 | mov $r15 1 | ||
267 | ld b32 $r14 D[$r0 + #gpc_id] | ||
268 | shl b32 $r15 $r14 | ||
269 | mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET | ||
270 | sethi $r14 0x400000 | ||
271 | call #nv_wr32 | ||
272 | ret | ||
273 | |||
274 | // Disables various things, waits a bit, and re-enables them.. | ||
275 | // | ||
276 | // Not sure how exactly this helps, perhaps "ENABLE" is not such a | ||
277 | // good description for the bits we turn off? Anyways, without this, | ||
278 | // funny things happen. | ||
279 | // | ||
280 | ctx_redswitch: | ||
281 | mov $r14 0x614 | ||
282 | shl b32 $r14 6 | ||
283 | mov $r15 0x020 | ||
284 | iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER | ||
285 | mov $r15 8 | ||
286 | ctx_redswitch_delay: | ||
287 | sub b32 $r15 1 | ||
288 | bra ne #ctx_redswitch_delay | ||
289 | mov $r15 0xa20 | ||
290 | iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER | ||
291 | ret | ||
292 | |||
293 | // Transfer GPC context data between GPU and storage area | ||
294 | // | ||
295 | // In: $r15 context base address | ||
296 | // $p1 clear on save, set on load | ||
297 | // $p2 set if opposite direction done/will be done, so: | ||
298 | // on save it means: "a load will follow this save" | ||
299 | // on load it means: "a save preceeded this load" | ||
300 | // | ||
301 | ctx_xfer: | ||
302 | // set context base address | ||
303 | mov $r1 0xa04 | ||
304 | shl b32 $r1 6 | ||
305 | iowr I[$r1 + 0x000] $r15// MEM_BASE | ||
306 | bra not $p1 #ctx_xfer_not_load | ||
307 | call #ctx_redswitch | ||
308 | ctx_xfer_not_load: | ||
309 | |||
310 | // strands | ||
311 | mov $r1 0x4afc | ||
312 | sethi $r1 0x20000 | ||
313 | mov $r2 0xc | ||
314 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c | ||
315 | call #strand_wait | ||
316 | mov $r2 0x47fc | ||
317 | sethi $r2 0x20000 | ||
318 | iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 | ||
319 | xbit $r2 $flags $p1 | ||
320 | add b32 $r2 3 | ||
321 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) | ||
322 | |||
323 | // mmio context | ||
324 | xbit $r10 $flags $p1 // direction | ||
325 | or $r10 2 // first | ||
326 | mov $r11 0x0000 | ||
327 | sethi $r11 0x500000 | ||
328 | ld b32 $r12 D[$r0 + #gpc_id] | ||
329 | shl b32 $r12 15 | ||
330 | add b32 $r11 $r12 // base = NV_PGRAPH_GPCn | ||
331 | ld b32 $r12 D[$r0 + #gpc_mmio_list_head] | ||
332 | ld b32 $r13 D[$r0 + #gpc_mmio_list_tail] | ||
333 | mov $r14 0 // not multi | ||
334 | call #mmctx_xfer | ||
335 | |||
336 | // per-TPC mmio context | ||
337 | xbit $r10 $flags $p1 // direction | ||
338 | or $r10 4 // last | ||
339 | mov $r11 0x4000 | ||
340 | sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0 | ||
341 | ld b32 $r12 D[$r0 + #gpc_id] | ||
342 | shl b32 $r12 15 | ||
343 | add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0 | ||
344 | ld b32 $r12 D[$r0 + #tpc_mmio_list_head] | ||
345 | ld b32 $r13 D[$r0 + #tpc_mmio_list_tail] | ||
346 | ld b32 $r15 D[$r0 + #tpc_mask] | ||
347 | mov $r14 0x800 // stride = 0x800 | ||
348 | call #mmctx_xfer | ||
349 | |||
350 | // wait for strands to finish | ||
351 | call #strand_wait | ||
352 | |||
353 | // if load, or a save without a load following, do some | ||
354 | // unknown stuff that's done after finishing a block of | ||
355 | // strand commands | ||
356 | bra $p1 #ctx_xfer_post | ||
357 | bra not $p2 #ctx_xfer_done | ||
358 | ctx_xfer_post: | ||
359 | mov $r1 0x4afc | ||
360 | sethi $r1 0x20000 | ||
361 | mov $r2 0xd | ||
362 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d | ||
363 | call #strand_wait | ||
364 | |||
365 | // mark completion in HUB's barrier | ||
366 | ctx_xfer_done: | ||
367 | call #hub_barrier_done | ||
368 | ret | ||
369 | #endif | ||
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc index 61a6b43ece19..c2d9e59bb58f 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc | |||
@@ -1,6 +1,5 @@ | |||
1 | /* fuc microcode for nvc0 PGRAPH/GPC | 1 | /* |
2 | * | 2 | * Copyright 2013 Red Hat Inc. |
3 | * Copyright 2011 Red Hat Inc. | ||
4 | * | 3 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), | 5 | * copy of this software and associated documentation files (the "Software"), |
@@ -20,32 +19,17 @@ | |||
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | * | 21 | * |
23 | * Authors: Ben Skeggs | 22 | * Authors: Ben Skeggs <bskeggs@redhat.com> |
24 | */ | 23 | */ |
25 | 24 | ||
26 | /* To build: | 25 | #define NVGF |
27 | * m4 gpcnvc0.fuc | envyas -a -w -m fuc -V fuc3 -o gpcnvc0.fuc.h | 26 | #include "macros.fuc" |
28 | */ | ||
29 | |||
30 | /* TODO | ||
31 | * - bracket certain functions with scratch writes, useful for debugging | ||
32 | * - watchdog timer around ctx operations | ||
33 | */ | ||
34 | 27 | ||
35 | .section #nvc0_grgpc_data | 28 | .section #nvc0_grgpc_data |
36 | include(`nvc0.fuc') | 29 | #define INCLUDE_DATA |
37 | gpc_id: .b32 0 | 30 | #include "com.fuc" |
38 | gpc_mmio_list_head: .b32 0 | 31 | #include "gpc.fuc" |
39 | gpc_mmio_list_tail: .b32 0 | ||
40 | |||
41 | tpc_count: .b32 0 | ||
42 | tpc_mask: .b32 0 | ||
43 | tpc_mmio_list_head: .b32 0 | ||
44 | tpc_mmio_list_tail: .b32 0 | ||
45 | 32 | ||
46 | cmd_queue: queue_init | ||
47 | |||
48 | // chipset descriptions | ||
49 | chipsets: | 33 | chipsets: |
50 | .b8 0xc0 0 0 0 | 34 | .b8 0xc0 0 0 0 |
51 | .b16 #nvc0_gpc_mmio_head | 35 | .b16 #nvc0_gpc_mmio_head |
@@ -159,335 +143,12 @@ nvc1_tpc_mmio_tail: | |||
159 | mmctx_data(0x000424, 2); | 143 | mmctx_data(0x000424, 2); |
160 | mmctx_data(0x0006e0, 1); | 144 | mmctx_data(0x0006e0, 1); |
161 | nvd9_tpc_mmio_tail: | 145 | nvd9_tpc_mmio_tail: |
146 | #undef INCLUDE_DATA | ||
162 | 147 | ||
163 | .section #nvc0_grgpc_code | 148 | .section #nvc0_grgpc_code |
149 | #define INCLUDE_CODE | ||
164 | bra #init | 150 | bra #init |
165 | define(`include_code') | 151 | #include "com.fuc" |
166 | include(`nvc0.fuc') | 152 | #include "gpc.fuc" |
167 | |||
168 | // reports an exception to the host | ||
169 | // | ||
170 | // In: $r15 error code (see nvc0.fuc) | ||
171 | // | ||
172 | error: | ||
173 | push $r14 | ||
174 | mov $r14 -0x67ec // 0x9814 | ||
175 | sethi $r14 0x400000 | ||
176 | call #nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code | ||
177 | add b32 $r14 0x41c | ||
178 | mov $r15 1 | ||
179 | call #nv_wr32 // HUB_CTXCTL_INTR_UP_SET | ||
180 | pop $r14 | ||
181 | ret | ||
182 | |||
183 | // GPC fuc initialisation, executed by triggering ucode start, will | ||
184 | // fall through to main loop after completion. | ||
185 | // | ||
186 | // Input: | ||
187 | // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) | ||
188 | // CC_SCRATCH[1]: context base | ||
189 | // | ||
190 | // Output: | ||
191 | // CC_SCRATCH[0]: | ||
192 | // 31:31: set to signal completion | ||
193 | // CC_SCRATCH[1]: | ||
194 | // 31:0: GPC context size | ||
195 | // | ||
196 | init: | ||
197 | clear b32 $r0 | ||
198 | mov $sp $r0 | ||
199 | |||
200 | // enable fifo access | ||
201 | mov $r1 0x1200 | ||
202 | mov $r2 2 | ||
203 | iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE | ||
204 | |||
205 | // setup i0 handler, and route all interrupts to it | ||
206 | mov $r1 #ih | ||
207 | mov $iv0 $r1 | ||
208 | mov $r1 0x400 | ||
209 | iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH | ||
210 | |||
211 | // enable fifo interrupt | ||
212 | mov $r2 4 | ||
213 | iowr I[$r1 + 0x000] $r2 // INTR_EN_SET | ||
214 | |||
215 | // enable interrupts | ||
216 | bset $flags ie0 | ||
217 | |||
218 | // figure out which GPC we are, and how many TPCs we have | ||
219 | mov $r1 0x608 | ||
220 | shl b32 $r1 6 | ||
221 | iord $r2 I[$r1 + 0x000] // UNITS | ||
222 | mov $r3 1 | ||
223 | and $r2 0x1f | ||
224 | shl b32 $r3 $r2 | ||
225 | sub b32 $r3 1 | ||
226 | st b32 D[$r0 + #tpc_count] $r2 | ||
227 | st b32 D[$r0 + #tpc_mask] $r3 | ||
228 | add b32 $r1 0x400 | ||
229 | iord $r2 I[$r1 + 0x000] // MYINDEX | ||
230 | st b32 D[$r0 + #gpc_id] $r2 | ||
231 | |||
232 | // find context data for this chipset | ||
233 | mov $r2 0x800 | ||
234 | shl b32 $r2 6 | ||
235 | iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] | ||
236 | mov $r1 #chipsets - 12 | ||
237 | init_find_chipset: | ||
238 | add b32 $r1 12 | ||
239 | ld b32 $r3 D[$r1 + 0x00] | ||
240 | cmpu b32 $r3 $r2 | ||
241 | bra e #init_context | ||
242 | cmpu b32 $r3 0 | ||
243 | bra ne #init_find_chipset | ||
244 | // unknown chipset | ||
245 | ret | ||
246 | |||
247 | // initialise context base, and size tracking | ||
248 | init_context: | ||
249 | mov $r2 0x800 | ||
250 | shl b32 $r2 6 | ||
251 | iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base | ||
252 | clear b32 $r3 // track GPC context size here | ||
253 | |||
254 | // set mmctx base addresses now so we don't have to do it later, | ||
255 | // they don't currently ever change | ||
256 | mov $r4 0x700 | ||
257 | shl b32 $r4 6 | ||
258 | shr b32 $r5 $r2 8 | ||
259 | iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE | ||
260 | iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE | ||
261 | |||
262 | // calculate GPC mmio context size, store the chipset-specific | ||
263 | // mmio list pointers somewhere we can get at them later without | ||
264 | // re-parsing the chipset list | ||
265 | clear b32 $r14 | ||
266 | clear b32 $r15 | ||
267 | ld b16 $r14 D[$r1 + 4] | ||
268 | ld b16 $r15 D[$r1 + 6] | ||
269 | st b16 D[$r0 + #gpc_mmio_list_head] $r14 | ||
270 | st b16 D[$r0 + #gpc_mmio_list_tail] $r15 | ||
271 | call #mmctx_size | ||
272 | add b32 $r2 $r15 | ||
273 | add b32 $r3 $r15 | ||
274 | |||
275 | // calculate per-TPC mmio context size, store the list pointers | ||
276 | ld b16 $r14 D[$r1 + 8] | ||
277 | ld b16 $r15 D[$r1 + 10] | ||
278 | st b16 D[$r0 + #tpc_mmio_list_head] $r14 | ||
279 | st b16 D[$r0 + #tpc_mmio_list_tail] $r15 | ||
280 | call #mmctx_size | ||
281 | ld b32 $r14 D[$r0 + #tpc_count] | ||
282 | mulu $r14 $r15 | ||
283 | add b32 $r2 $r14 | ||
284 | add b32 $r3 $r14 | ||
285 | |||
286 | // round up base/size to 256 byte boundary (for strand SWBASE) | ||
287 | add b32 $r4 0x1300 | ||
288 | shr b32 $r3 2 | ||
289 | iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!? | ||
290 | shr b32 $r2 8 | ||
291 | shr b32 $r3 6 | ||
292 | add b32 $r2 1 | ||
293 | add b32 $r3 1 | ||
294 | shl b32 $r2 8 | ||
295 | shl b32 $r3 8 | ||
296 | |||
297 | // calculate size of strand context data | ||
298 | mov b32 $r15 $r2 | ||
299 | call #strand_ctx_init | ||
300 | add b32 $r3 $r15 | ||
301 | |||
302 | // save context size, and tell HUB we're done | ||
303 | mov $r1 0x800 | ||
304 | shl b32 $r1 6 | ||
305 | iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size | ||
306 | add b32 $r1 0x800 | ||
307 | clear b32 $r2 | ||
308 | bset $r2 31 | ||
309 | iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 | ||
310 | |||
311 | // Main program loop, very simple, sleeps until woken up by the interrupt | ||
312 | // handler, pulls a command from the queue and executes its handler | ||
313 | // | ||
314 | main: | ||
315 | bset $flags $p0 | ||
316 | sleep $p0 | ||
317 | mov $r13 #cmd_queue | ||
318 | call #queue_get | ||
319 | bra $p1 #main | ||
320 | |||
321 | // 0x0000-0x0003 are all context transfers | ||
322 | cmpu b32 $r14 0x04 | ||
323 | bra nc #main_not_ctx_xfer | ||
324 | // fetch $flags and mask off $p1/$p2 | ||
325 | mov $r1 $flags | ||
326 | mov $r2 0x0006 | ||
327 | not b32 $r2 | ||
328 | and $r1 $r2 | ||
329 | // set $p1/$p2 according to transfer type | ||
330 | shl b32 $r14 1 | ||
331 | or $r1 $r14 | ||
332 | mov $flags $r1 | ||
333 | // transfer context data | ||
334 | call #ctx_xfer | ||
335 | bra #main | ||
336 | |||
337 | main_not_ctx_xfer: | ||
338 | shl b32 $r15 $r14 16 | ||
339 | or $r15 E_BAD_COMMAND | ||
340 | call #error | ||
341 | bra #main | ||
342 | |||
343 | // interrupt handler | ||
344 | ih: | ||
345 | push $r8 | ||
346 | mov $r8 $flags | ||
347 | push $r8 | ||
348 | push $r9 | ||
349 | push $r10 | ||
350 | push $r11 | ||
351 | push $r13 | ||
352 | push $r14 | ||
353 | push $r15 | ||
354 | |||
355 | // incoming fifo command? | ||
356 | iord $r10 I[$r0 + 0x200] // INTR | ||
357 | and $r11 $r10 0x00000004 | ||
358 | bra e #ih_no_fifo | ||
359 | // queue incoming fifo command for later processing | ||
360 | mov $r11 0x1900 | ||
361 | mov $r13 #cmd_queue | ||
362 | iord $r14 I[$r11 + 0x100] // FIFO_CMD | ||
363 | iord $r15 I[$r11 + 0x000] // FIFO_DATA | ||
364 | call #queue_put | ||
365 | add b32 $r11 0x400 | ||
366 | mov $r14 1 | ||
367 | iowr I[$r11 + 0x000] $r14 // FIFO_ACK | ||
368 | |||
369 | // ack, and wake up main() | ||
370 | ih_no_fifo: | ||
371 | iowr I[$r0 + 0x100] $r10 // INTR_ACK | ||
372 | |||
373 | pop $r15 | ||
374 | pop $r14 | ||
375 | pop $r13 | ||
376 | pop $r11 | ||
377 | pop $r10 | ||
378 | pop $r9 | ||
379 | pop $r8 | ||
380 | mov $flags $r8 | ||
381 | pop $r8 | ||
382 | bclr $flags $p0 | ||
383 | iret | ||
384 | |||
385 | // Set this GPC's bit in HUB_BAR, used to signal completion of various | ||
386 | // activities to the HUB fuc | ||
387 | // | ||
388 | hub_barrier_done: | ||
389 | mov $r15 1 | ||
390 | ld b32 $r14 D[$r0 + #gpc_id] | ||
391 | shl b32 $r15 $r14 | ||
392 | mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET | ||
393 | sethi $r14 0x400000 | ||
394 | call #nv_wr32 | ||
395 | ret | ||
396 | |||
397 | // Disables various things, waits a bit, and re-enables them.. | ||
398 | // | ||
399 | // Not sure how exactly this helps, perhaps "ENABLE" is not such a | ||
400 | // good description for the bits we turn off? Anyways, without this, | ||
401 | // funny things happen. | ||
402 | // | ||
403 | ctx_redswitch: | ||
404 | mov $r14 0x614 | ||
405 | shl b32 $r14 6 | ||
406 | mov $r15 0x020 | ||
407 | iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER | ||
408 | mov $r15 8 | ||
409 | ctx_redswitch_delay: | ||
410 | sub b32 $r15 1 | ||
411 | bra ne #ctx_redswitch_delay | ||
412 | mov $r15 0xa20 | ||
413 | iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER | ||
414 | ret | ||
415 | |||
416 | // Transfer GPC context data between GPU and storage area | ||
417 | // | ||
418 | // In: $r15 context base address | ||
419 | // $p1 clear on save, set on load | ||
420 | // $p2 set if opposite direction done/will be done, so: | ||
421 | // on save it means: "a load will follow this save" | ||
422 | // on load it means: "a save preceeded this load" | ||
423 | // | ||
424 | ctx_xfer: | ||
425 | // set context base address | ||
426 | mov $r1 0xa04 | ||
427 | shl b32 $r1 6 | ||
428 | iowr I[$r1 + 0x000] $r15// MEM_BASE | ||
429 | bra not $p1 #ctx_xfer_not_load | ||
430 | call #ctx_redswitch | ||
431 | ctx_xfer_not_load: | ||
432 | |||
433 | // strands | ||
434 | mov $r1 0x4afc | ||
435 | sethi $r1 0x20000 | ||
436 | mov $r2 0xc | ||
437 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c | ||
438 | call #strand_wait | ||
439 | mov $r2 0x47fc | ||
440 | sethi $r2 0x20000 | ||
441 | iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 | ||
442 | xbit $r2 $flags $p1 | ||
443 | add b32 $r2 3 | ||
444 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) | ||
445 | |||
446 | // mmio context | ||
447 | xbit $r10 $flags $p1 // direction | ||
448 | or $r10 2 // first | ||
449 | mov $r11 0x0000 | ||
450 | sethi $r11 0x500000 | ||
451 | ld b32 $r12 D[$r0 + #gpc_id] | ||
452 | shl b32 $r12 15 | ||
453 | add b32 $r11 $r12 // base = NV_PGRAPH_GPCn | ||
454 | ld b32 $r12 D[$r0 + #gpc_mmio_list_head] | ||
455 | ld b32 $r13 D[$r0 + #gpc_mmio_list_tail] | ||
456 | mov $r14 0 // not multi | ||
457 | call #mmctx_xfer | ||
458 | |||
459 | // per-TPC mmio context | ||
460 | xbit $r10 $flags $p1 // direction | ||
461 | or $r10 4 // last | ||
462 | mov $r11 0x4000 | ||
463 | sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0 | ||
464 | ld b32 $r12 D[$r0 + #gpc_id] | ||
465 | shl b32 $r12 15 | ||
466 | add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0 | ||
467 | ld b32 $r12 D[$r0 + #tpc_mmio_list_head] | ||
468 | ld b32 $r13 D[$r0 + #tpc_mmio_list_tail] | ||
469 | ld b32 $r15 D[$r0 + #tpc_mask] | ||
470 | mov $r14 0x800 // stride = 0x800 | ||
471 | call #mmctx_xfer | ||
472 | |||
473 | // wait for strands to finish | ||
474 | call #strand_wait | ||
475 | |||
476 | // if load, or a save without a load following, do some | ||
477 | // unknown stuff that's done after finishing a block of | ||
478 | // strand commands | ||
479 | bra $p1 #ctx_xfer_post | ||
480 | bra not $p2 #ctx_xfer_done | ||
481 | ctx_xfer_post: | ||
482 | mov $r1 0x4afc | ||
483 | sethi $r1 0x20000 | ||
484 | mov $r2 0xd | ||
485 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d | ||
486 | call #strand_wait | ||
487 | |||
488 | // mark completion in HUB's barrier | ||
489 | ctx_xfer_done: | ||
490 | call #hub_barrier_done | ||
491 | ret | ||
492 | |||
493 | .align 256 | 153 | .align 256 |
154 | #undef INCLUDE_CODE | ||
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc.h b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc.h index cafcc638042a..66ec1acaadee 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc.h +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnvc0.fuc.h | |||
@@ -192,7 +192,7 @@ uint32_t nvc0_grgpc_code[] = { | |||
192 | 0x0089d000, | 192 | 0x0089d000, |
193 | 0x081887f1, | 193 | 0x081887f1, |
194 | 0xd00684b6, | 194 | 0xd00684b6, |
195 | /* 0x00e2: wait_done_wait_donez */ | 195 | /* 0x00e2: wait_donez_ne */ |
196 | 0x87f1008a, | 196 | 0x87f1008a, |
197 | 0x84b60400, | 197 | 0x84b60400, |
198 | 0x0088cf06, | 198 | 0x0088cf06, |
@@ -209,7 +209,7 @@ uint32_t nvc0_grgpc_code[] = { | |||
209 | 0x87f10089, | 209 | 0x87f10089, |
210 | 0x84b60818, | 210 | 0x84b60818, |
211 | 0x008ad006, | 211 | 0x008ad006, |
212 | /* 0x011c: wait_done_wait_doneo */ | 212 | /* 0x011c: wait_doneo_e */ |
213 | 0x040087f1, | 213 | 0x040087f1, |
214 | 0xcf0684b6, | 214 | 0xcf0684b6, |
215 | 0x8aff0088, | 215 | 0x8aff0088, |
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc index ccaeb50aa76b..2fc585eeff95 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc | |||
@@ -1,6 +1,5 @@ | |||
1 | /* fuc microcode for nve0 PGRAPH/GPC | 1 | /* |
2 | * | 2 | * Copyright 2013 Red Hat Inc. |
3 | * Copyright 2011 Red Hat Inc. | ||
4 | * | 3 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), | 5 | * copy of this software and associated documentation files (the "Software"), |
@@ -20,32 +19,17 @@ | |||
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | * | 21 | * |
23 | * Authors: Ben Skeggs | 22 | * Authors: Ben Skeggs <bskeggs@redhat.com> |
24 | */ | 23 | */ |
25 | 24 | ||
26 | /* To build: | 25 | #define NVGK |
27 | * m4 nve0_grgpc.fuc | envyas -a -w -m fuc -V nva3 -o nve0_grgpc.fuc.h | 26 | #include "macros.fuc" |
28 | */ | ||
29 | |||
30 | /* TODO | ||
31 | * - bracket certain functions with scratch writes, useful for debugging | ||
32 | * - watchdog timer around ctx operations | ||
33 | */ | ||
34 | 27 | ||
35 | .section #nve0_grgpc_data | 28 | .section #nve0_grgpc_data |
36 | include(`nve0.fuc') | 29 | #define INCLUDE_DATA |
37 | gpc_id: .b32 0 | 30 | #include "com.fuc" |
38 | gpc_mmio_list_head: .b32 0 | 31 | #include "gpc.fuc" |
39 | gpc_mmio_list_tail: .b32 0 | ||
40 | |||
41 | tpc_count: .b32 0 | ||
42 | tpc_mask: .b32 0 | ||
43 | tpc_mmio_list_head: .b32 0 | ||
44 | tpc_mmio_list_tail: .b32 0 | ||
45 | 32 | ||
46 | cmd_queue: queue_init | ||
47 | |||
48 | // chipset descriptions | ||
49 | chipsets: | 33 | chipsets: |
50 | .b8 0xe4 0 0 0 | 34 | .b8 0xe4 0 0 0 |
51 | .b16 #nve4_gpc_mmio_head | 35 | .b16 #nve4_gpc_mmio_head |
@@ -182,335 +166,12 @@ mmctx_data(0x000758, 1) | |||
182 | mmctx_data(0x000770, 1) | 166 | mmctx_data(0x000770, 1) |
183 | mmctx_data(0x000778, 2) | 167 | mmctx_data(0x000778, 2) |
184 | nvf0_tpc_mmio_tail: | 168 | nvf0_tpc_mmio_tail: |
169 | #undef INCLUDE_DATA | ||
185 | 170 | ||
186 | .section #nve0_grgpc_code | 171 | .section #nve0_grgpc_code |
172 | #define INCLUDE_CODE | ||
187 | bra #init | 173 | bra #init |
188 | define(`include_code') | 174 | #include "com.fuc" |
189 | include(`nve0.fuc') | 175 | #include "gpc.fuc" |
190 | |||
191 | // reports an exception to the host | ||
192 | // | ||
193 | // In: $r15 error code (see nve0.fuc) | ||
194 | // | ||
195 | error: | ||
196 | push $r14 | ||
197 | mov $r14 -0x67ec // 0x9814 | ||
198 | sethi $r14 0x400000 | ||
199 | call #nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code | ||
200 | add b32 $r14 0x41c | ||
201 | mov $r15 1 | ||
202 | call #nv_wr32 // HUB_CTXCTL_INTR_UP_SET | ||
203 | pop $r14 | ||
204 | ret | ||
205 | |||
206 | // GPC fuc initialisation, executed by triggering ucode start, will | ||
207 | // fall through to main loop after completion. | ||
208 | // | ||
209 | // Input: | ||
210 | // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) | ||
211 | // CC_SCRATCH[1]: context base | ||
212 | // | ||
213 | // Output: | ||
214 | // CC_SCRATCH[0]: | ||
215 | // 31:31: set to signal completion | ||
216 | // CC_SCRATCH[1]: | ||
217 | // 31:0: GPC context size | ||
218 | // | ||
219 | init: | ||
220 | clear b32 $r0 | ||
221 | mov $sp $r0 | ||
222 | |||
223 | // enable fifo access | ||
224 | mov $r1 0x1200 | ||
225 | mov $r2 2 | ||
226 | iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE | ||
227 | |||
228 | // setup i0 handler, and route all interrupts to it | ||
229 | mov $r1 #ih | ||
230 | mov $iv0 $r1 | ||
231 | mov $r1 0x400 | ||
232 | iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH | ||
233 | |||
234 | // enable fifo interrupt | ||
235 | mov $r2 4 | ||
236 | iowr I[$r1 + 0x000] $r2 // INTR_EN_SET | ||
237 | |||
238 | // enable interrupts | ||
239 | bset $flags ie0 | ||
240 | |||
241 | // figure out which GPC we are, and how many TPCs we have | ||
242 | mov $r1 0x608 | ||
243 | shl b32 $r1 6 | ||
244 | iord $r2 I[$r1 + 0x000] // UNITS | ||
245 | mov $r3 1 | ||
246 | and $r2 0x1f | ||
247 | shl b32 $r3 $r2 | ||
248 | sub b32 $r3 1 | ||
249 | st b32 D[$r0 + #tpc_count] $r2 | ||
250 | st b32 D[$r0 + #tpc_mask] $r3 | ||
251 | add b32 $r1 0x400 | ||
252 | iord $r2 I[$r1 + 0x000] // MYINDEX | ||
253 | st b32 D[$r0 + #gpc_id] $r2 | ||
254 | |||
255 | // find context data for this chipset | ||
256 | mov $r2 0x800 | ||
257 | shl b32 $r2 6 | ||
258 | iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] | ||
259 | mov $r1 #chipsets - 12 | ||
260 | init_find_chipset: | ||
261 | add b32 $r1 12 | ||
262 | ld b32 $r3 D[$r1 + 0x00] | ||
263 | cmpu b32 $r3 $r2 | ||
264 | bra e #init_context | ||
265 | cmpu b32 $r3 0 | ||
266 | bra ne #init_find_chipset | ||
267 | // unknown chipset | ||
268 | ret | ||
269 | |||
270 | // initialise context base, and size tracking | ||
271 | init_context: | ||
272 | mov $r2 0x800 | ||
273 | shl b32 $r2 6 | ||
274 | iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base | ||
275 | clear b32 $r3 // track GPC context size here | ||
276 | |||
277 | // set mmctx base addresses now so we don't have to do it later, | ||
278 | // they don't currently ever change | ||
279 | mov $r4 0x700 | ||
280 | shl b32 $r4 6 | ||
281 | shr b32 $r5 $r2 8 | ||
282 | iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE | ||
283 | iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE | ||
284 | |||
285 | // calculate GPC mmio context size, store the chipset-specific | ||
286 | // mmio list pointers somewhere we can get at them later without | ||
287 | // re-parsing the chipset list | ||
288 | clear b32 $r14 | ||
289 | clear b32 $r15 | ||
290 | ld b16 $r14 D[$r1 + 4] | ||
291 | ld b16 $r15 D[$r1 + 6] | ||
292 | st b16 D[$r0 + #gpc_mmio_list_head] $r14 | ||
293 | st b16 D[$r0 + #gpc_mmio_list_tail] $r15 | ||
294 | call #mmctx_size | ||
295 | add b32 $r2 $r15 | ||
296 | add b32 $r3 $r15 | ||
297 | |||
298 | // calculate per-TPC mmio context size, store the list pointers | ||
299 | ld b16 $r14 D[$r1 + 8] | ||
300 | ld b16 $r15 D[$r1 + 10] | ||
301 | st b16 D[$r0 + #tpc_mmio_list_head] $r14 | ||
302 | st b16 D[$r0 + #tpc_mmio_list_tail] $r15 | ||
303 | call #mmctx_size | ||
304 | ld b32 $r14 D[$r0 + #tpc_count] | ||
305 | mulu $r14 $r15 | ||
306 | add b32 $r2 $r14 | ||
307 | add b32 $r3 $r14 | ||
308 | |||
309 | // round up base/size to 256 byte boundary (for strand SWBASE) | ||
310 | add b32 $r4 0x1300 | ||
311 | shr b32 $r3 2 | ||
312 | iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!? | ||
313 | shr b32 $r2 8 | ||
314 | shr b32 $r3 6 | ||
315 | add b32 $r2 1 | ||
316 | add b32 $r3 1 | ||
317 | shl b32 $r2 8 | ||
318 | shl b32 $r3 8 | ||
319 | |||
320 | // calculate size of strand context data | ||
321 | mov b32 $r15 $r2 | ||
322 | call #strand_ctx_init | ||
323 | add b32 $r3 $r15 | ||
324 | |||
325 | // save context size, and tell HUB we're done | ||
326 | mov $r1 0x800 | ||
327 | shl b32 $r1 6 | ||
328 | iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size | ||
329 | add b32 $r1 0x800 | ||
330 | clear b32 $r2 | ||
331 | bset $r2 31 | ||
332 | iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 | ||
333 | |||
334 | // Main program loop, very simple, sleeps until woken up by the interrupt | ||
335 | // handler, pulls a command from the queue and executes its handler | ||
336 | // | ||
337 | main: | ||
338 | bset $flags $p0 | ||
339 | sleep $p0 | ||
340 | mov $r13 #cmd_queue | ||
341 | call #queue_get | ||
342 | bra $p1 #main | ||
343 | |||
344 | // 0x0000-0x0003 are all context transfers | ||
345 | cmpu b32 $r14 0x04 | ||
346 | bra nc #main_not_ctx_xfer | ||
347 | // fetch $flags and mask off $p1/$p2 | ||
348 | mov $r1 $flags | ||
349 | mov $r2 0x0006 | ||
350 | not b32 $r2 | ||
351 | and $r1 $r2 | ||
352 | // set $p1/$p2 according to transfer type | ||
353 | shl b32 $r14 1 | ||
354 | or $r1 $r14 | ||
355 | mov $flags $r1 | ||
356 | // transfer context data | ||
357 | call #ctx_xfer | ||
358 | bra #main | ||
359 | |||
360 | main_not_ctx_xfer: | ||
361 | shl b32 $r15 $r14 16 | ||
362 | or $r15 E_BAD_COMMAND | ||
363 | call #error | ||
364 | bra #main | ||
365 | |||
366 | // interrupt handler | ||
367 | ih: | ||
368 | push $r8 | ||
369 | mov $r8 $flags | ||
370 | push $r8 | ||
371 | push $r9 | ||
372 | push $r10 | ||
373 | push $r11 | ||
374 | push $r13 | ||
375 | push $r14 | ||
376 | push $r15 | ||
377 | |||
378 | // incoming fifo command? | ||
379 | iord $r10 I[$r0 + 0x200] // INTR | ||
380 | and $r11 $r10 0x00000004 | ||
381 | bra e #ih_no_fifo | ||
382 | // queue incoming fifo command for later processing | ||
383 | mov $r11 0x1900 | ||
384 | mov $r13 #cmd_queue | ||
385 | iord $r14 I[$r11 + 0x100] // FIFO_CMD | ||
386 | iord $r15 I[$r11 + 0x000] // FIFO_DATA | ||
387 | call #queue_put | ||
388 | add b32 $r11 0x400 | ||
389 | mov $r14 1 | ||
390 | iowr I[$r11 + 0x000] $r14 // FIFO_ACK | ||
391 | |||
392 | // ack, and wake up main() | ||
393 | ih_no_fifo: | ||
394 | iowr I[$r0 + 0x100] $r10 // INTR_ACK | ||
395 | |||
396 | pop $r15 | ||
397 | pop $r14 | ||
398 | pop $r13 | ||
399 | pop $r11 | ||
400 | pop $r10 | ||
401 | pop $r9 | ||
402 | pop $r8 | ||
403 | mov $flags $r8 | ||
404 | pop $r8 | ||
405 | bclr $flags $p0 | ||
406 | iret | ||
407 | |||
408 | // Set this GPC's bit in HUB_BAR, used to signal completion of various | ||
409 | // activities to the HUB fuc | ||
410 | // | ||
411 | hub_barrier_done: | ||
412 | mov $r15 1 | ||
413 | ld b32 $r14 D[$r0 + #gpc_id] | ||
414 | shl b32 $r15 $r14 | ||
415 | mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET | ||
416 | sethi $r14 0x400000 | ||
417 | call #nv_wr32 | ||
418 | ret | ||
419 | |||
420 | // Disables various things, waits a bit, and re-enables them.. | ||
421 | // | ||
422 | // Not sure how exactly this helps, perhaps "ENABLE" is not such a | ||
423 | // good description for the bits we turn off? Anyways, without this, | ||
424 | // funny things happen. | ||
425 | // | ||
426 | ctx_redswitch: | ||
427 | mov $r14 0x614 | ||
428 | shl b32 $r14 6 | ||
429 | mov $r15 0x020 | ||
430 | iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER | ||
431 | mov $r15 8 | ||
432 | ctx_redswitch_delay: | ||
433 | sub b32 $r15 1 | ||
434 | bra ne #ctx_redswitch_delay | ||
435 | mov $r15 0xa20 | ||
436 | iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER | ||
437 | ret | ||
438 | |||
439 | // Transfer GPC context data between GPU and storage area | ||
440 | // | ||
441 | // In: $r15 context base address | ||
442 | // $p1 clear on save, set on load | ||
443 | // $p2 set if opposite direction done/will be done, so: | ||
444 | // on save it means: "a load will follow this save" | ||
445 | // on load it means: "a save preceeded this load" | ||
446 | // | ||
447 | ctx_xfer: | ||
448 | // set context base address | ||
449 | mov $r1 0xa04 | ||
450 | shl b32 $r1 6 | ||
451 | iowr I[$r1 + 0x000] $r15// MEM_BASE | ||
452 | bra not $p1 #ctx_xfer_not_load | ||
453 | call #ctx_redswitch | ||
454 | ctx_xfer_not_load: | ||
455 | |||
456 | // strands | ||
457 | mov $r1 0x4afc | ||
458 | sethi $r1 0x20000 | ||
459 | mov $r2 0xc | ||
460 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c | ||
461 | call #strand_wait | ||
462 | mov $r2 0x47fc | ||
463 | sethi $r2 0x20000 | ||
464 | iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 | ||
465 | xbit $r2 $flags $p1 | ||
466 | add b32 $r2 3 | ||
467 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) | ||
468 | |||
469 | // mmio context | ||
470 | xbit $r10 $flags $p1 // direction | ||
471 | or $r10 2 // first | ||
472 | mov $r11 0x0000 | ||
473 | sethi $r11 0x500000 | ||
474 | ld b32 $r12 D[$r0 + #gpc_id] | ||
475 | shl b32 $r12 15 | ||
476 | add b32 $r11 $r12 // base = NV_PGRAPH_GPCn | ||
477 | ld b32 $r12 D[$r0 + #gpc_mmio_list_head] | ||
478 | ld b32 $r13 D[$r0 + #gpc_mmio_list_tail] | ||
479 | mov $r14 0 // not multi | ||
480 | call #mmctx_xfer | ||
481 | |||
482 | // per-TPC mmio context | ||
483 | xbit $r10 $flags $p1 // direction | ||
484 | or $r10 4 // last | ||
485 | mov $r11 0x4000 | ||
486 | sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0 | ||
487 | ld b32 $r12 D[$r0 + #gpc_id] | ||
488 | shl b32 $r12 15 | ||
489 | add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0 | ||
490 | ld b32 $r12 D[$r0 + #tpc_mmio_list_head] | ||
491 | ld b32 $r13 D[$r0 + #tpc_mmio_list_tail] | ||
492 | ld b32 $r15 D[$r0 + #tpc_mask] | ||
493 | mov $r14 0x800 // stride = 0x800 | ||
494 | call #mmctx_xfer | ||
495 | |||
496 | // wait for strands to finish | ||
497 | call #strand_wait | ||
498 | |||
499 | // if load, or a save without a load following, do some | ||
500 | // unknown stuff that's done after finishing a block of | ||
501 | // strand commands | ||
502 | bra $p1 #ctx_xfer_post | ||
503 | bra not $p2 #ctx_xfer_done | ||
504 | ctx_xfer_post: | ||
505 | mov $r1 0x4afc | ||
506 | sethi $r1 0x20000 | ||
507 | mov $r2 0xd | ||
508 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d | ||
509 | call #strand_wait | ||
510 | |||
511 | // mark completion in HUB's barrier | ||
512 | ctx_xfer_done: | ||
513 | call #hub_barrier_done | ||
514 | ret | ||
515 | |||
516 | .align 256 | 176 | .align 256 |
177 | #undef INCLUDE_CODE | ||
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h index 419bd5da1e00..504ae96cd3dd 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h | |||
@@ -223,7 +223,7 @@ uint32_t nve0_grgpc_code[] = { | |||
223 | 0x0089d000, | 223 | 0x0089d000, |
224 | 0x081887f1, | 224 | 0x081887f1, |
225 | 0xd00684b6, | 225 | 0xd00684b6, |
226 | /* 0x00e2: wait_done_wait_donez */ | 226 | /* 0x00e2: wait_donez_ne */ |
227 | 0x87f1008a, | 227 | 0x87f1008a, |
228 | 0x84b60400, | 228 | 0x84b60400, |
229 | 0x0088cf06, | 229 | 0x0088cf06, |
@@ -240,7 +240,7 @@ uint32_t nve0_grgpc_code[] = { | |||
240 | 0x87f10089, | 240 | 0x87f10089, |
241 | 0x84b60818, | 241 | 0x84b60818, |
242 | 0x008ad006, | 242 | 0x008ad006, |
243 | /* 0x011c: wait_done_wait_doneo */ | 243 | /* 0x011c: wait_doneo_e */ |
244 | 0x040087f1, | 244 | 0x040087f1, |
245 | 0xcf0684b6, | 245 | 0xcf0684b6, |
246 | 0x8aff0088, | 246 | 0x8aff0088, |
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hub.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hub.fuc new file mode 100644 index 000000000000..5c68bf6d69aa --- /dev/null +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hub.fuc | |||
@@ -0,0 +1,755 @@ | |||
1 | /* fuc microcode for nvc0 PGRAPH/HUB | ||
2 | * | ||
3 | * Copyright 2011 Red Hat Inc. | ||
4 | * | ||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
6 | * copy of this software and associated documentation files (the "Software"), | ||
7 | * to deal in the Software without restriction, including without limitation | ||
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
9 | * and/or sell copies of the Software, and to permit persons to whom the | ||
10 | * Software is furnished to do so, subject to the following conditions: | ||
11 | * | ||
12 | * The above copyright notice and this permission notice shall be included in | ||
13 | * all copies or substantial portions of the Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
21 | * OTHER DEALINGS IN THE SOFTWARE. | ||
22 | * | ||
23 | * Authors: Ben Skeggs | ||
24 | */ | ||
25 | |||
26 | #ifdef INCLUDE_DATA | ||
27 | gpc_count: .b32 0 | ||
28 | rop_count: .b32 0 | ||
29 | cmd_queue: queue_init | ||
30 | hub_mmio_list_head: .b32 0 | ||
31 | hub_mmio_list_tail: .b32 0 | ||
32 | |||
33 | ctx_current: .b32 0 | ||
34 | |||
35 | .align 256 | ||
36 | chan_data: | ||
37 | chan_mmio_count: .b32 0 | ||
38 | chan_mmio_address: .b32 0 | ||
39 | |||
40 | .align 256 | ||
41 | xfer_data: .skip 256 | ||
42 | |||
43 | #endif | ||
44 | |||
45 | #ifdef INCLUDE_CODE | ||
46 | // reports an exception to the host | ||
47 | // | ||
48 | // In: $r15 error code (see nvc0.fuc) | ||
49 | // | ||
50 | error: | ||
51 | push $r14 | ||
52 | mov $r14 0x814 | ||
53 | shl b32 $r14 6 | ||
54 | iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code | ||
55 | mov $r14 0xc1c | ||
56 | shl b32 $r14 6 | ||
57 | mov $r15 1 | ||
58 | iowr I[$r14 + 0x000] $r15 // INTR_UP_SET | ||
59 | pop $r14 | ||
60 | ret | ||
61 | |||
62 | // HUB fuc initialisation, executed by triggering ucode start, will | ||
63 | // fall through to main loop after completion. | ||
64 | // | ||
65 | // Input: | ||
66 | // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) | ||
67 | // | ||
68 | // Output: | ||
69 | // CC_SCRATCH[0]: | ||
70 | // 31:31: set to signal completion | ||
71 | // CC_SCRATCH[1]: | ||
72 | // 31:0: total PGRAPH context size | ||
73 | // | ||
74 | init: | ||
75 | clear b32 $r0 | ||
76 | mov $sp $r0 | ||
77 | mov $xdbase $r0 | ||
78 | |||
79 | // enable fifo access | ||
80 | mov $r1 0x1200 | ||
81 | mov $r2 2 | ||
82 | iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE | ||
83 | |||
84 | // setup i0 handler, and route all interrupts to it | ||
85 | mov $r1 #ih | ||
86 | mov $iv0 $r1 | ||
87 | mov $r1 0x400 | ||
88 | iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH | ||
89 | |||
90 | // route HUB_CHANNEL_SWITCH to fuc interrupt 8 | ||
91 | mov $r3 0x404 | ||
92 | shl b32 $r3 6 | ||
93 | mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8 | ||
94 | iowr I[$r3 + 0x000] $r2 | ||
95 | |||
96 | // not sure what these are, route them because NVIDIA does, and | ||
97 | // the IRQ handler will signal the host if we ever get one.. we | ||
98 | // may find out if/why we need to handle these if so.. | ||
99 | // | ||
100 | mov $r2 0x2004 | ||
101 | iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9 | ||
102 | mov $r2 0x200b | ||
103 | iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10 | ||
104 | mov $r2 0x200c | ||
105 | iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15 | ||
106 | |||
107 | // enable all INTR_UP interrupts | ||
108 | mov $r2 0xc24 | ||
109 | shl b32 $r2 6 | ||
110 | not b32 $r3 $r0 | ||
111 | iowr I[$r2] $r3 | ||
112 | |||
113 | // enable fifo, ctxsw, 9, 10, 15 interrupts | ||
114 | mov $r2 -0x78fc // 0x8704 | ||
115 | sethi $r2 0 | ||
116 | iowr I[$r1 + 0x000] $r2 // INTR_EN_SET | ||
117 | |||
118 | // fifo level triggered, rest edge | ||
119 | sub b32 $r1 0x100 | ||
120 | mov $r2 4 | ||
121 | iowr I[$r1] $r2 | ||
122 | |||
123 | // enable interrupts | ||
124 | bset $flags ie0 | ||
125 | |||
126 | // fetch enabled GPC/ROP counts | ||
127 | mov $r14 -0x69fc // 0x409604 | ||
128 | sethi $r14 0x400000 | ||
129 | call #nv_rd32 | ||
130 | extr $r1 $r15 16:20 | ||
131 | st b32 D[$r0 + #rop_count] $r1 | ||
132 | and $r15 0x1f | ||
133 | st b32 D[$r0 + #gpc_count] $r15 | ||
134 | |||
135 | // set BAR_REQMASK to GPC mask | ||
136 | mov $r1 1 | ||
137 | shl b32 $r1 $r15 | ||
138 | sub b32 $r1 1 | ||
139 | mov $r2 0x40c | ||
140 | shl b32 $r2 6 | ||
141 | iowr I[$r2 + 0x000] $r1 | ||
142 | iowr I[$r2 + 0x100] $r1 | ||
143 | |||
144 | // find context data for this chipset | ||
145 | mov $r2 0x800 | ||
146 | shl b32 $r2 6 | ||
147 | iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] | ||
148 | mov $r15 #chipsets - 8 | ||
149 | init_find_chipset: | ||
150 | add b32 $r15 8 | ||
151 | ld b32 $r3 D[$r15 + 0x00] | ||
152 | cmpu b32 $r3 $r2 | ||
153 | bra e #init_context | ||
154 | cmpu b32 $r3 0 | ||
155 | bra ne #init_find_chipset | ||
156 | // unknown chipset | ||
157 | ret | ||
158 | |||
159 | // context size calculation, reserve first 256 bytes for use by fuc | ||
160 | init_context: | ||
161 | mov $r1 256 | ||
162 | |||
163 | // calculate size of mmio context data | ||
164 | ld b16 $r14 D[$r15 + 4] | ||
165 | ld b16 $r15 D[$r15 + 6] | ||
166 | sethi $r14 0 | ||
167 | st b32 D[$r0 + #hub_mmio_list_head] $r14 | ||
168 | st b32 D[$r0 + #hub_mmio_list_tail] $r15 | ||
169 | call #mmctx_size | ||
170 | |||
171 | // set mmctx base addresses now so we don't have to do it later, | ||
172 | // they don't (currently) ever change | ||
173 | mov $r3 0x700 | ||
174 | shl b32 $r3 6 | ||
175 | shr b32 $r4 $r1 8 | ||
176 | iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE | ||
177 | iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE | ||
178 | add b32 $r3 0x1300 | ||
179 | add b32 $r1 $r15 | ||
180 | shr b32 $r15 2 | ||
181 | iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!? | ||
182 | |||
183 | // strands, base offset needs to be aligned to 256 bytes | ||
184 | shr b32 $r1 8 | ||
185 | add b32 $r1 1 | ||
186 | shl b32 $r1 8 | ||
187 | mov b32 $r15 $r1 | ||
188 | call #strand_ctx_init | ||
189 | add b32 $r1 $r15 | ||
190 | |||
191 | // initialise each GPC in sequence by passing in the offset of its | ||
192 | // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which | ||
193 | // has previously been uploaded by the host) running. | ||
194 | // | ||
195 | // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31 | ||
196 | // when it has completed, and return the size of its context data | ||
197 | // in GPCn_CC_SCRATCH[1] | ||
198 | // | ||
199 | ld b32 $r3 D[$r0 + #gpc_count] | ||
200 | mov $r4 0x2000 | ||
201 | sethi $r4 0x500000 | ||
202 | init_gpc: | ||
203 | // setup, and start GPC ucode running | ||
204 | add b32 $r14 $r4 0x804 | ||
205 | mov b32 $r15 $r1 | ||
206 | call #nv_wr32 // CC_SCRATCH[1] = ctx offset | ||
207 | add b32 $r14 $r4 0x800 | ||
208 | mov b32 $r15 $r2 | ||
209 | call #nv_wr32 // CC_SCRATCH[0] = chipset | ||
210 | add b32 $r14 $r4 0x10c | ||
211 | clear b32 $r15 | ||
212 | call #nv_wr32 | ||
213 | add b32 $r14 $r4 0x104 | ||
214 | call #nv_wr32 // ENTRY | ||
215 | add b32 $r14 $r4 0x100 | ||
216 | mov $r15 2 // CTRL_START_TRIGGER | ||
217 | call #nv_wr32 // CTRL | ||
218 | |||
219 | // wait for it to complete, and adjust context size | ||
220 | add b32 $r14 $r4 0x800 | ||
221 | init_gpc_wait: | ||
222 | call #nv_rd32 | ||
223 | xbit $r15 $r15 31 | ||
224 | bra e #init_gpc_wait | ||
225 | add b32 $r14 $r4 0x804 | ||
226 | call #nv_rd32 | ||
227 | add b32 $r1 $r15 | ||
228 | |||
229 | // next! | ||
230 | add b32 $r4 0x8000 | ||
231 | sub b32 $r3 1 | ||
232 | bra ne #init_gpc | ||
233 | |||
234 | // save context size, and tell host we're ready | ||
235 | mov $r2 0x800 | ||
236 | shl b32 $r2 6 | ||
237 | iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size | ||
238 | add b32 $r2 0x800 | ||
239 | clear b32 $r1 | ||
240 | bset $r1 31 | ||
241 | iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000 | ||
242 | |||
243 | // Main program loop, very simple, sleeps until woken up by the interrupt | ||
244 | // handler, pulls a command from the queue and executes its handler | ||
245 | // | ||
246 | main: | ||
247 | // sleep until we have something to do | ||
248 | bset $flags $p0 | ||
249 | sleep $p0 | ||
250 | mov $r13 #cmd_queue | ||
251 | call #queue_get | ||
252 | bra $p1 #main | ||
253 | |||
254 | // context switch, requested by GPU? | ||
255 | cmpu b32 $r14 0x4001 | ||
256 | bra ne #main_not_ctx_switch | ||
257 | trace_set(T_AUTO) | ||
258 | mov $r1 0xb00 | ||
259 | shl b32 $r1 6 | ||
260 | iord $r2 I[$r1 + 0x100] // CHAN_NEXT | ||
261 | iord $r1 I[$r1 + 0x000] // CHAN_CUR | ||
262 | |||
263 | xbit $r3 $r1 31 | ||
264 | bra e #chsw_no_prev | ||
265 | xbit $r3 $r2 31 | ||
266 | bra e #chsw_prev_no_next | ||
267 | push $r2 | ||
268 | mov b32 $r2 $r1 | ||
269 | trace_set(T_SAVE) | ||
270 | bclr $flags $p1 | ||
271 | bset $flags $p2 | ||
272 | call #ctx_xfer | ||
273 | trace_clr(T_SAVE); | ||
274 | pop $r2 | ||
275 | trace_set(T_LOAD); | ||
276 | bset $flags $p1 | ||
277 | call #ctx_xfer | ||
278 | trace_clr(T_LOAD); | ||
279 | bra #chsw_done | ||
280 | chsw_prev_no_next: | ||
281 | push $r2 | ||
282 | mov b32 $r2 $r1 | ||
283 | bclr $flags $p1 | ||
284 | bclr $flags $p2 | ||
285 | call #ctx_xfer | ||
286 | pop $r2 | ||
287 | mov $r1 0xb00 | ||
288 | shl b32 $r1 6 | ||
289 | iowr I[$r1] $r2 | ||
290 | bra #chsw_done | ||
291 | chsw_no_prev: | ||
292 | xbit $r3 $r2 31 | ||
293 | bra e #chsw_done | ||
294 | bset $flags $p1 | ||
295 | bclr $flags $p2 | ||
296 | call #ctx_xfer | ||
297 | |||
298 | // ack the context switch request | ||
299 | chsw_done: | ||
300 | mov $r1 0xb0c | ||
301 | shl b32 $r1 6 | ||
302 | mov $r2 1 | ||
303 | iowr I[$r1 + 0x000] $r2 // 0x409b0c | ||
304 | trace_clr(T_AUTO) | ||
305 | bra #main | ||
306 | |||
307 | // request to set current channel? (*not* a context switch) | ||
308 | main_not_ctx_switch: | ||
309 | cmpu b32 $r14 0x0001 | ||
310 | bra ne #main_not_ctx_chan | ||
311 | mov b32 $r2 $r15 | ||
312 | call #ctx_chan | ||
313 | bra #main_done | ||
314 | |||
315 | // request to store current channel context? | ||
316 | main_not_ctx_chan: | ||
317 | cmpu b32 $r14 0x0002 | ||
318 | bra ne #main_not_ctx_save | ||
319 | trace_set(T_SAVE) | ||
320 | bclr $flags $p1 | ||
321 | bclr $flags $p2 | ||
322 | call #ctx_xfer | ||
323 | trace_clr(T_SAVE) | ||
324 | bra #main_done | ||
325 | |||
326 | main_not_ctx_save: | ||
327 | shl b32 $r15 $r14 16 | ||
328 | or $r15 E_BAD_COMMAND | ||
329 | call #error | ||
330 | bra #main | ||
331 | |||
332 | main_done: | ||
333 | mov $r1 0x820 | ||
334 | shl b32 $r1 6 | ||
335 | clear b32 $r2 | ||
336 | bset $r2 31 | ||
337 | iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 | ||
338 | bra #main | ||
339 | |||
340 | // interrupt handler | ||
341 | ih: | ||
342 | push $r8 | ||
343 | mov $r8 $flags | ||
344 | push $r8 | ||
345 | push $r9 | ||
346 | push $r10 | ||
347 | push $r11 | ||
348 | push $r13 | ||
349 | push $r14 | ||
350 | push $r15 | ||
351 | |||
352 | // incoming fifo command? | ||
353 | iord $r10 I[$r0 + 0x200] // INTR | ||
354 | and $r11 $r10 0x00000004 | ||
355 | bra e #ih_no_fifo | ||
356 | // queue incoming fifo command for later processing | ||
357 | mov $r11 0x1900 | ||
358 | mov $r13 #cmd_queue | ||
359 | iord $r14 I[$r11 + 0x100] // FIFO_CMD | ||
360 | iord $r15 I[$r11 + 0x000] // FIFO_DATA | ||
361 | call #queue_put | ||
362 | add b32 $r11 0x400 | ||
363 | mov $r14 1 | ||
364 | iowr I[$r11 + 0x000] $r14 // FIFO_ACK | ||
365 | |||
366 | // context switch request? | ||
367 | ih_no_fifo: | ||
368 | and $r11 $r10 0x00000100 | ||
369 | bra e #ih_no_ctxsw | ||
370 | // enqueue a context switch for later processing | ||
371 | mov $r13 #cmd_queue | ||
372 | mov $r14 0x4001 | ||
373 | call #queue_put | ||
374 | |||
375 | // anything we didn't handle, bring it to the host's attention | ||
376 | ih_no_ctxsw: | ||
377 | mov $r11 0x104 | ||
378 | not b32 $r11 | ||
379 | and $r11 $r10 $r11 | ||
380 | bra e #ih_no_other | ||
381 | mov $r10 0xc1c | ||
382 | shl b32 $r10 6 | ||
383 | iowr I[$r10] $r11 // INTR_UP_SET | ||
384 | |||
385 | // ack, and wake up main() | ||
386 | ih_no_other: | ||
387 | iowr I[$r0 + 0x100] $r10 // INTR_ACK | ||
388 | |||
389 | pop $r15 | ||
390 | pop $r14 | ||
391 | pop $r13 | ||
392 | pop $r11 | ||
393 | pop $r10 | ||
394 | pop $r9 | ||
395 | pop $r8 | ||
396 | mov $flags $r8 | ||
397 | pop $r8 | ||
398 | bclr $flags $p0 | ||
399 | iret | ||
400 | |||
401 | #ifdef NVGF | ||
402 | // Not real sure, but, MEM_CMD 7 will hang forever if this isn't done | ||
403 | ctx_4160s: | ||
404 | mov $r14 0x4160 | ||
405 | sethi $r14 0x400000 | ||
406 | mov $r15 1 | ||
407 | call #nv_wr32 | ||
408 | ctx_4160s_wait: | ||
409 | call #nv_rd32 | ||
410 | xbit $r15 $r15 4 | ||
411 | bra e #ctx_4160s_wait | ||
412 | ret | ||
413 | |||
414 | // Without clearing again at end of xfer, some things cause PGRAPH | ||
415 | // to hang with STATUS=0x00000007 until it's cleared.. fbcon can | ||
416 | // still function with it set however... | ||
417 | ctx_4160c: | ||
418 | mov $r14 0x4160 | ||
419 | sethi $r14 0x400000 | ||
420 | clear b32 $r15 | ||
421 | call #nv_wr32 | ||
422 | ret | ||
423 | #endif | ||
424 | |||
425 | // Again, not real sure | ||
426 | // | ||
427 | // In: $r15 value to set 0x404170 to | ||
428 | // | ||
429 | ctx_4170s: | ||
430 | mov $r14 0x4170 | ||
431 | sethi $r14 0x400000 | ||
432 | or $r15 0x10 | ||
433 | call #nv_wr32 | ||
434 | ret | ||
435 | |||
436 | // Waits for a ctx_4170s() call to complete | ||
437 | // | ||
438 | ctx_4170w: | ||
439 | mov $r14 0x4170 | ||
440 | sethi $r14 0x400000 | ||
441 | call #nv_rd32 | ||
442 | and $r15 0x10 | ||
443 | bra ne #ctx_4170w | ||
444 | ret | ||
445 | |||
446 | // Disables various things, waits a bit, and re-enables them.. | ||
447 | // | ||
448 | // Not sure how exactly this helps, perhaps "ENABLE" is not such a | ||
449 | // good description for the bits we turn off? Anyways, without this, | ||
450 | // funny things happen. | ||
451 | // | ||
452 | ctx_redswitch: | ||
453 | mov $r14 0x614 | ||
454 | shl b32 $r14 6 | ||
455 | mov $r15 0x270 | ||
456 | iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL | ||
457 | mov $r15 8 | ||
458 | ctx_redswitch_delay: | ||
459 | sub b32 $r15 1 | ||
460 | bra ne #ctx_redswitch_delay | ||
461 | mov $r15 0x770 | ||
462 | iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL | ||
463 | ret | ||
464 | |||
465 | // Not a clue what this is for, except that unless the value is 0x10, the | ||
466 | // strand context is saved (and presumably restored) incorrectly.. | ||
467 | // | ||
468 | // In: $r15 value to set to (0x00/0x10 are used) | ||
469 | // | ||
470 | ctx_86c: | ||
471 | mov $r14 0x86c | ||
472 | shl b32 $r14 6 | ||
473 | iowr I[$r14] $r15 // HUB(0x86c) = val | ||
474 | mov $r14 -0x75ec | ||
475 | sethi $r14 0x400000 | ||
476 | call #nv_wr32 // ROP(0xa14) = val | ||
477 | mov $r14 -0x5794 | ||
478 | sethi $r14 0x410000 | ||
479 | call #nv_wr32 // GPC(0x86c) = val | ||
480 | ret | ||
481 | |||
482 | // ctx_load - load's a channel's ctxctl data, and selects its vm | ||
483 | // | ||
484 | // In: $r2 channel address | ||
485 | // | ||
486 | ctx_load: | ||
487 | trace_set(T_CHAN) | ||
488 | |||
489 | // switch to channel, somewhat magic in parts.. | ||
490 | mov $r10 12 // DONE_UNK12 | ||
491 | call #wait_donez | ||
492 | mov $r1 0xa24 | ||
493 | shl b32 $r1 6 | ||
494 | iowr I[$r1 + 0x000] $r0 // 0x409a24 | ||
495 | mov $r3 0xb00 | ||
496 | shl b32 $r3 6 | ||
497 | iowr I[$r3 + 0x100] $r2 // CHAN_NEXT | ||
498 | mov $r1 0xa0c | ||
499 | shl b32 $r1 6 | ||
500 | mov $r4 7 | ||
501 | iowr I[$r1 + 0x000] $r2 // MEM_CHAN | ||
502 | iowr I[$r1 + 0x100] $r4 // MEM_CMD | ||
503 | ctx_chan_wait_0: | ||
504 | iord $r4 I[$r1 + 0x100] | ||
505 | and $r4 0x1f | ||
506 | bra ne #ctx_chan_wait_0 | ||
507 | iowr I[$r3 + 0x000] $r2 // CHAN_CUR | ||
508 | |||
509 | // load channel header, fetch PGRAPH context pointer | ||
510 | mov $xtargets $r0 | ||
511 | bclr $r2 31 | ||
512 | shl b32 $r2 4 | ||
513 | add b32 $r2 2 | ||
514 | |||
515 | trace_set(T_LCHAN) | ||
516 | mov $r1 0xa04 | ||
517 | shl b32 $r1 6 | ||
518 | iowr I[$r1 + 0x000] $r2 // MEM_BASE | ||
519 | mov $r1 0xa20 | ||
520 | shl b32 $r1 6 | ||
521 | mov $r2 0x0002 | ||
522 | sethi $r2 0x80000000 | ||
523 | iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram | ||
524 | mov $r1 0x10 // chan + 0x0210 | ||
525 | mov $r2 #xfer_data | ||
526 | sethi $r2 0x00020000 // 16 bytes | ||
527 | xdld $r1 $r2 | ||
528 | xdwait | ||
529 | trace_clr(T_LCHAN) | ||
530 | |||
531 | // update current context | ||
532 | ld b32 $r1 D[$r0 + #xfer_data + 4] | ||
533 | shl b32 $r1 24 | ||
534 | ld b32 $r2 D[$r0 + #xfer_data + 0] | ||
535 | shr b32 $r2 8 | ||
536 | or $r1 $r2 | ||
537 | st b32 D[$r0 + #ctx_current] $r1 | ||
538 | |||
539 | // set transfer base to start of context, and fetch context header | ||
540 | trace_set(T_LCTXH) | ||
541 | mov $r2 0xa04 | ||
542 | shl b32 $r2 6 | ||
543 | iowr I[$r2 + 0x000] $r1 // MEM_BASE | ||
544 | mov $r2 1 | ||
545 | mov $r1 0xa20 | ||
546 | shl b32 $r1 6 | ||
547 | iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm | ||
548 | mov $r1 #chan_data | ||
549 | sethi $r1 0x00060000 // 256 bytes | ||
550 | xdld $r0 $r1 | ||
551 | xdwait | ||
552 | trace_clr(T_LCTXH) | ||
553 | |||
554 | trace_clr(T_CHAN) | ||
555 | ret | ||
556 | |||
557 | // ctx_chan - handler for HUB_SET_CHAN command, will set a channel as | ||
558 | // the active channel for ctxctl, but not actually transfer | ||
559 | // any context data. intended for use only during initial | ||
560 | // context construction. | ||
561 | // | ||
562 | // In: $r2 channel address | ||
563 | // | ||
564 | ctx_chan: | ||
565 | #ifdef NVGF | ||
566 | call #ctx_4160s | ||
567 | #endif | ||
568 | call #ctx_load | ||
569 | mov $r10 12 // DONE_UNK12 | ||
570 | call #wait_donez | ||
571 | mov $r1 0xa10 | ||
572 | shl b32 $r1 6 | ||
573 | mov $r2 5 | ||
574 | iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???) | ||
575 | ctx_chan_wait: | ||
576 | iord $r2 I[$r1 + 0x000] | ||
577 | or $r2 $r2 | ||
578 | bra ne #ctx_chan_wait | ||
579 | #ifdef NVGF | ||
580 | call #ctx_4160c | ||
581 | #endif | ||
582 | ret | ||
583 | |||
584 | // Execute per-context state overrides list | ||
585 | // | ||
586 | // Only executed on the first load of a channel. Might want to look into | ||
587 | // removing this and having the host directly modify the channel's context | ||
588 | // to change this state... The nouveau DRM already builds this list as | ||
589 | // it's definitely needed for NVIDIA's, so we may as well use it for now | ||
590 | // | ||
591 | // Input: $r1 mmio list length | ||
592 | // | ||
593 | ctx_mmio_exec: | ||
594 | // set transfer base to be the mmio list | ||
595 | ld b32 $r3 D[$r0 + #chan_mmio_address] | ||
596 | mov $r2 0xa04 | ||
597 | shl b32 $r2 6 | ||
598 | iowr I[$r2 + 0x000] $r3 // MEM_BASE | ||
599 | |||
600 | clear b32 $r3 | ||
601 | ctx_mmio_loop: | ||
602 | // fetch next 256 bytes of mmio list if necessary | ||
603 | and $r4 $r3 0xff | ||
604 | bra ne #ctx_mmio_pull | ||
605 | mov $r5 #xfer_data | ||
606 | sethi $r5 0x00060000 // 256 bytes | ||
607 | xdld $r3 $r5 | ||
608 | xdwait | ||
609 | |||
610 | // execute a single list entry | ||
611 | ctx_mmio_pull: | ||
612 | ld b32 $r14 D[$r4 + #xfer_data + 0x00] | ||
613 | ld b32 $r15 D[$r4 + #xfer_data + 0x04] | ||
614 | call #nv_wr32 | ||
615 | |||
616 | // next! | ||
617 | add b32 $r3 8 | ||
618 | sub b32 $r1 1 | ||
619 | bra ne #ctx_mmio_loop | ||
620 | |||
621 | // set transfer base back to the current context | ||
622 | ctx_mmio_done: | ||
623 | ld b32 $r3 D[$r0 + #ctx_current] | ||
624 | iowr I[$r2 + 0x000] $r3 // MEM_BASE | ||
625 | |||
626 | // disable the mmio list now, we don't need/want to execute it again | ||
627 | st b32 D[$r0 + #chan_mmio_count] $r0 | ||
628 | mov $r1 #chan_data | ||
629 | sethi $r1 0x00060000 // 256 bytes | ||
630 | xdst $r0 $r1 | ||
631 | xdwait | ||
632 | ret | ||
633 | |||
634 | // Transfer HUB context data between GPU and storage area | ||
635 | // | ||
636 | // In: $r2 channel address | ||
637 | // $p1 clear on save, set on load | ||
638 | // $p2 set if opposite direction done/will be done, so: | ||
639 | // on save it means: "a load will follow this save" | ||
640 | // on load it means: "a save preceeded this load" | ||
641 | // | ||
642 | ctx_xfer: | ||
643 | // according to mwk, some kind of wait for idle | ||
644 | mov $r15 0xc00 | ||
645 | shl b32 $r15 6 | ||
646 | mov $r14 4 | ||
647 | iowr I[$r15 + 0x200] $r14 | ||
648 | ctx_xfer_idle: | ||
649 | iord $r14 I[$r15 + 0x000] | ||
650 | and $r14 0x2000 | ||
651 | bra ne #ctx_xfer_idle | ||
652 | |||
653 | bra not $p1 #ctx_xfer_pre | ||
654 | bra $p2 #ctx_xfer_pre_load | ||
655 | ctx_xfer_pre: | ||
656 | mov $r15 0x10 | ||
657 | call #ctx_86c | ||
658 | #ifdef NVGF | ||
659 | call #ctx_4160s | ||
660 | #endif | ||
661 | bra not $p1 #ctx_xfer_exec | ||
662 | |||
663 | ctx_xfer_pre_load: | ||
664 | mov $r15 2 | ||
665 | call #ctx_4170s | ||
666 | call #ctx_4170w | ||
667 | call #ctx_redswitch | ||
668 | clear b32 $r15 | ||
669 | call #ctx_4170s | ||
670 | call #ctx_load | ||
671 | |||
672 | // fetch context pointer, and initiate xfer on all GPCs | ||
673 | ctx_xfer_exec: | ||
674 | ld b32 $r1 D[$r0 + #ctx_current] | ||
675 | mov $r2 0x414 | ||
676 | shl b32 $r2 6 | ||
677 | iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset | ||
678 | mov $r14 -0x5b00 | ||
679 | sethi $r14 0x410000 | ||
680 | mov b32 $r15 $r1 | ||
681 | call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer | ||
682 | add b32 $r14 4 | ||
683 | xbit $r15 $flags $p1 | ||
684 | xbit $r2 $flags $p2 | ||
685 | shl b32 $r2 1 | ||
686 | or $r15 $r2 | ||
687 | call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type) | ||
688 | |||
689 | // strands | ||
690 | mov $r1 0x4afc | ||
691 | sethi $r1 0x20000 | ||
692 | mov $r2 0xc | ||
693 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c | ||
694 | call #strand_wait | ||
695 | mov $r2 0x47fc | ||
696 | sethi $r2 0x20000 | ||
697 | iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 | ||
698 | xbit $r2 $flags $p1 | ||
699 | add b32 $r2 3 | ||
700 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) | ||
701 | |||
702 | // mmio context | ||
703 | xbit $r10 $flags $p1 // direction | ||
704 | or $r10 6 // first, last | ||
705 | mov $r11 0 // base = 0 | ||
706 | ld b32 $r12 D[$r0 + #hub_mmio_list_head] | ||
707 | ld b32 $r13 D[$r0 + #hub_mmio_list_tail] | ||
708 | mov $r14 0 // not multi | ||
709 | call #mmctx_xfer | ||
710 | |||
711 | // wait for GPCs to all complete | ||
712 | mov $r10 8 // DONE_BAR | ||
713 | call #wait_doneo | ||
714 | |||
715 | // wait for strand xfer to complete | ||
716 | call #strand_wait | ||
717 | |||
718 | // post-op | ||
719 | bra $p1 #ctx_xfer_post | ||
720 | mov $r10 12 // DONE_UNK12 | ||
721 | call #wait_donez | ||
722 | mov $r1 0xa10 | ||
723 | shl b32 $r1 6 | ||
724 | mov $r2 5 | ||
725 | iowr I[$r1] $r2 // MEM_CMD | ||
726 | ctx_xfer_post_save_wait: | ||
727 | iord $r2 I[$r1] | ||
728 | or $r2 $r2 | ||
729 | bra ne #ctx_xfer_post_save_wait | ||
730 | |||
731 | bra $p2 #ctx_xfer_done | ||
732 | ctx_xfer_post: | ||
733 | mov $r15 2 | ||
734 | call #ctx_4170s | ||
735 | clear b32 $r15 | ||
736 | call #ctx_86c | ||
737 | call #strand_post | ||
738 | call #ctx_4170w | ||
739 | clear b32 $r15 | ||
740 | call #ctx_4170s | ||
741 | |||
742 | bra not $p1 #ctx_xfer_no_post_mmio | ||
743 | ld b32 $r1 D[$r0 + #chan_mmio_count] | ||
744 | or $r1 $r1 | ||
745 | bra e #ctx_xfer_no_post_mmio | ||
746 | call #ctx_mmio_exec | ||
747 | |||
748 | ctx_xfer_no_post_mmio: | ||
749 | #ifdef NVGF | ||
750 | call #ctx_4160c | ||
751 | #endif | ||
752 | |||
753 | ctx_xfer_done: | ||
754 | ret | ||
755 | #endif | ||
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc index 9f174be6bc82..f144f665b807 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc | |||
@@ -1,6 +1,5 @@ | |||
1 | /* fuc microcode for nvc0 PGRAPH/HUB | 1 | /* |
2 | * | 2 | * Copyright 2013 Red Hat Inc. |
3 | * Copyright 2011 Red Hat Inc. | ||
4 | * | 3 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), | 5 | * copy of this software and associated documentation files (the "Software"), |
@@ -20,32 +19,17 @@ | |||
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | * | 21 | * |
23 | * Authors: Ben Skeggs | 22 | * Authors: Ben Skeggs <bskeggs@redhat.com> |
24 | */ | 23 | */ |
25 | 24 | ||
26 | /* To build: | 25 | #define NVGF |
27 | * m4 hubnvc0.fuc | envyas -a -w -m fuc -V fuc3 -o hubnvc0.fuc.h | 26 | #include "macros.fuc" |
28 | */ | ||
29 | 27 | ||
30 | .section #nvc0_grhub_data | 28 | .section #nvc0_grhub_data |
31 | include(`nvc0.fuc') | 29 | #define INCLUDE_DATA |
32 | gpc_count: .b32 0 | 30 | #include "com.fuc" |
33 | rop_count: .b32 0 | 31 | #include "hub.fuc" |
34 | cmd_queue: queue_init | ||
35 | hub_mmio_list_head: .b32 0 | ||
36 | hub_mmio_list_tail: .b32 0 | ||
37 | |||
38 | ctx_current: .b32 0 | ||
39 | |||
40 | .align 256 | ||
41 | chan_data: | ||
42 | chan_mmio_count: .b32 0 | ||
43 | chan_mmio_address: .b32 0 | ||
44 | 32 | ||
45 | .align 256 | ||
46 | xfer_data: .b32 0 | ||
47 | |||
48 | .align 256 | ||
49 | chipsets: | 33 | chipsets: |
50 | .b8 0xc0 0 0 0 | 34 | .b8 0xc0 0 0 0 |
51 | .b16 #nvc0_hub_mmio_head | 35 | .b16 #nvc0_hub_mmio_head |
@@ -124,710 +108,12 @@ mmctx_data(0x4064c0, 2) | |||
124 | nvc1_hub_mmio_tail: | 108 | nvc1_hub_mmio_tail: |
125 | mmctx_data(0x4064bc, 3) | 109 | mmctx_data(0x4064bc, 3) |
126 | nvd9_hub_mmio_tail: | 110 | nvd9_hub_mmio_tail: |
111 | #undef INCLUDE_DATA | ||
127 | 112 | ||
128 | .section #nvc0_grhub_code | 113 | .section #nvc0_grhub_code |
114 | #define INCLUDE_CODE | ||
129 | bra #init | 115 | bra #init |
130 | define(`include_code') | 116 | #include "com.fuc" |
131 | include(`nvc0.fuc') | 117 | #include "hub.fuc" |
132 | |||
133 | // reports an exception to the host | ||
134 | // | ||
135 | // In: $r15 error code (see nvc0.fuc) | ||
136 | // | ||
137 | error: | ||
138 | push $r14 | ||
139 | mov $r14 0x814 | ||
140 | shl b32 $r14 6 | ||
141 | iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code | ||
142 | mov $r14 0xc1c | ||
143 | shl b32 $r14 6 | ||
144 | mov $r15 1 | ||
145 | iowr I[$r14 + 0x000] $r15 // INTR_UP_SET | ||
146 | pop $r14 | ||
147 | ret | ||
148 | |||
149 | // HUB fuc initialisation, executed by triggering ucode start, will | ||
150 | // fall through to main loop after completion. | ||
151 | // | ||
152 | // Input: | ||
153 | // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) | ||
154 | // | ||
155 | // Output: | ||
156 | // CC_SCRATCH[0]: | ||
157 | // 31:31: set to signal completion | ||
158 | // CC_SCRATCH[1]: | ||
159 | // 31:0: total PGRAPH context size | ||
160 | // | ||
161 | init: | ||
162 | clear b32 $r0 | ||
163 | mov $sp $r0 | ||
164 | mov $xdbase $r0 | ||
165 | |||
166 | // enable fifo access | ||
167 | mov $r1 0x1200 | ||
168 | mov $r2 2 | ||
169 | iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE | ||
170 | |||
171 | // setup i0 handler, and route all interrupts to it | ||
172 | mov $r1 #ih | ||
173 | mov $iv0 $r1 | ||
174 | mov $r1 0x400 | ||
175 | iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH | ||
176 | |||
177 | // route HUB_CHANNEL_SWITCH to fuc interrupt 8 | ||
178 | mov $r3 0x404 | ||
179 | shl b32 $r3 6 | ||
180 | mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8 | ||
181 | iowr I[$r3 + 0x000] $r2 | ||
182 | |||
183 | // not sure what these are, route them because NVIDIA does, and | ||
184 | // the IRQ handler will signal the host if we ever get one.. we | ||
185 | // may find out if/why we need to handle these if so.. | ||
186 | // | ||
187 | mov $r2 0x2004 | ||
188 | iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9 | ||
189 | mov $r2 0x200b | ||
190 | iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10 | ||
191 | mov $r2 0x200c | ||
192 | iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15 | ||
193 | |||
194 | // enable all INTR_UP interrupts | ||
195 | mov $r2 0xc24 | ||
196 | shl b32 $r2 6 | ||
197 | not b32 $r3 $r0 | ||
198 | iowr I[$r2] $r3 | ||
199 | |||
200 | // enable fifo, ctxsw, 9, 10, 15 interrupts | ||
201 | mov $r2 -0x78fc // 0x8704 | ||
202 | sethi $r2 0 | ||
203 | iowr I[$r1 + 0x000] $r2 // INTR_EN_SET | ||
204 | |||
205 | // fifo level triggered, rest edge | ||
206 | sub b32 $r1 0x100 | ||
207 | mov $r2 4 | ||
208 | iowr I[$r1] $r2 | ||
209 | |||
210 | // enable interrupts | ||
211 | bset $flags ie0 | ||
212 | |||
213 | // fetch enabled GPC/ROP counts | ||
214 | mov $r14 -0x69fc // 0x409604 | ||
215 | sethi $r14 0x400000 | ||
216 | call #nv_rd32 | ||
217 | extr $r1 $r15 16:20 | ||
218 | st b32 D[$r0 + #rop_count] $r1 | ||
219 | and $r15 0x1f | ||
220 | st b32 D[$r0 + #gpc_count] $r15 | ||
221 | |||
222 | // set BAR_REQMASK to GPC mask | ||
223 | mov $r1 1 | ||
224 | shl b32 $r1 $r15 | ||
225 | sub b32 $r1 1 | ||
226 | mov $r2 0x40c | ||
227 | shl b32 $r2 6 | ||
228 | iowr I[$r2 + 0x000] $r1 | ||
229 | iowr I[$r2 + 0x100] $r1 | ||
230 | |||
231 | // find context data for this chipset | ||
232 | mov $r2 0x800 | ||
233 | shl b32 $r2 6 | ||
234 | iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] | ||
235 | mov $r15 #chipsets - 8 | ||
236 | init_find_chipset: | ||
237 | add b32 $r15 8 | ||
238 | ld b32 $r3 D[$r15 + 0x00] | ||
239 | cmpu b32 $r3 $r2 | ||
240 | bra e #init_context | ||
241 | cmpu b32 $r3 0 | ||
242 | bra ne #init_find_chipset | ||
243 | // unknown chipset | ||
244 | ret | ||
245 | |||
246 | // context size calculation, reserve first 256 bytes for use by fuc | ||
247 | init_context: | ||
248 | mov $r1 256 | ||
249 | |||
250 | // calculate size of mmio context data | ||
251 | ld b16 $r14 D[$r15 + 4] | ||
252 | ld b16 $r15 D[$r15 + 6] | ||
253 | sethi $r14 0 | ||
254 | st b32 D[$r0 + #hub_mmio_list_head] $r14 | ||
255 | st b32 D[$r0 + #hub_mmio_list_tail] $r15 | ||
256 | call #mmctx_size | ||
257 | |||
258 | // set mmctx base addresses now so we don't have to do it later, | ||
259 | // they don't (currently) ever change | ||
260 | mov $r3 0x700 | ||
261 | shl b32 $r3 6 | ||
262 | shr b32 $r4 $r1 8 | ||
263 | iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE | ||
264 | iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE | ||
265 | add b32 $r3 0x1300 | ||
266 | add b32 $r1 $r15 | ||
267 | shr b32 $r15 2 | ||
268 | iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!? | ||
269 | |||
270 | // strands, base offset needs to be aligned to 256 bytes | ||
271 | shr b32 $r1 8 | ||
272 | add b32 $r1 1 | ||
273 | shl b32 $r1 8 | ||
274 | mov b32 $r15 $r1 | ||
275 | call #strand_ctx_init | ||
276 | add b32 $r1 $r15 | ||
277 | |||
278 | // initialise each GPC in sequence by passing in the offset of its | ||
279 | // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which | ||
280 | // has previously been uploaded by the host) running. | ||
281 | // | ||
282 | // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31 | ||
283 | // when it has completed, and return the size of its context data | ||
284 | // in GPCn_CC_SCRATCH[1] | ||
285 | // | ||
286 | ld b32 $r3 D[$r0 + #gpc_count] | ||
287 | mov $r4 0x2000 | ||
288 | sethi $r4 0x500000 | ||
289 | init_gpc: | ||
290 | // setup, and start GPC ucode running | ||
291 | add b32 $r14 $r4 0x804 | ||
292 | mov b32 $r15 $r1 | ||
293 | call #nv_wr32 // CC_SCRATCH[1] = ctx offset | ||
294 | add b32 $r14 $r4 0x800 | ||
295 | mov b32 $r15 $r2 | ||
296 | call #nv_wr32 // CC_SCRATCH[0] = chipset | ||
297 | add b32 $r14 $r4 0x10c | ||
298 | clear b32 $r15 | ||
299 | call #nv_wr32 | ||
300 | add b32 $r14 $r4 0x104 | ||
301 | call #nv_wr32 // ENTRY | ||
302 | add b32 $r14 $r4 0x100 | ||
303 | mov $r15 2 // CTRL_START_TRIGGER | ||
304 | call #nv_wr32 // CTRL | ||
305 | |||
306 | // wait for it to complete, and adjust context size | ||
307 | add b32 $r14 $r4 0x800 | ||
308 | init_gpc_wait: | ||
309 | call #nv_rd32 | ||
310 | xbit $r15 $r15 31 | ||
311 | bra e #init_gpc_wait | ||
312 | add b32 $r14 $r4 0x804 | ||
313 | call #nv_rd32 | ||
314 | add b32 $r1 $r15 | ||
315 | |||
316 | // next! | ||
317 | add b32 $r4 0x8000 | ||
318 | sub b32 $r3 1 | ||
319 | bra ne #init_gpc | ||
320 | |||
321 | // save context size, and tell host we're ready | ||
322 | mov $r2 0x800 | ||
323 | shl b32 $r2 6 | ||
324 | iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size | ||
325 | add b32 $r2 0x800 | ||
326 | clear b32 $r1 | ||
327 | bset $r1 31 | ||
328 | iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000 | ||
329 | |||
330 | // Main program loop, very simple, sleeps until woken up by the interrupt | ||
331 | // handler, pulls a command from the queue and executes its handler | ||
332 | // | ||
333 | main: | ||
334 | // sleep until we have something to do | ||
335 | bset $flags $p0 | ||
336 | sleep $p0 | ||
337 | mov $r13 #cmd_queue | ||
338 | call #queue_get | ||
339 | bra $p1 #main | ||
340 | |||
341 | // context switch, requested by GPU? | ||
342 | cmpu b32 $r14 0x4001 | ||
343 | bra ne #main_not_ctx_switch | ||
344 | trace_set(T_AUTO) | ||
345 | mov $r1 0xb00 | ||
346 | shl b32 $r1 6 | ||
347 | iord $r2 I[$r1 + 0x100] // CHAN_NEXT | ||
348 | iord $r1 I[$r1 + 0x000] // CHAN_CUR | ||
349 | |||
350 | xbit $r3 $r1 31 | ||
351 | bra e #chsw_no_prev | ||
352 | xbit $r3 $r2 31 | ||
353 | bra e #chsw_prev_no_next | ||
354 | push $r2 | ||
355 | mov b32 $r2 $r1 | ||
356 | trace_set(T_SAVE) | ||
357 | bclr $flags $p1 | ||
358 | bset $flags $p2 | ||
359 | call #ctx_xfer | ||
360 | trace_clr(T_SAVE); | ||
361 | pop $r2 | ||
362 | trace_set(T_LOAD); | ||
363 | bset $flags $p1 | ||
364 | call #ctx_xfer | ||
365 | trace_clr(T_LOAD); | ||
366 | bra #chsw_done | ||
367 | chsw_prev_no_next: | ||
368 | push $r2 | ||
369 | mov b32 $r2 $r1 | ||
370 | bclr $flags $p1 | ||
371 | bclr $flags $p2 | ||
372 | call #ctx_xfer | ||
373 | pop $r2 | ||
374 | mov $r1 0xb00 | ||
375 | shl b32 $r1 6 | ||
376 | iowr I[$r1] $r2 | ||
377 | bra #chsw_done | ||
378 | chsw_no_prev: | ||
379 | xbit $r3 $r2 31 | ||
380 | bra e #chsw_done | ||
381 | bset $flags $p1 | ||
382 | bclr $flags $p2 | ||
383 | call #ctx_xfer | ||
384 | |||
385 | // ack the context switch request | ||
386 | chsw_done: | ||
387 | mov $r1 0xb0c | ||
388 | shl b32 $r1 6 | ||
389 | mov $r2 1 | ||
390 | iowr I[$r1 + 0x000] $r2 // 0x409b0c | ||
391 | trace_clr(T_AUTO) | ||
392 | bra #main | ||
393 | |||
394 | // request to set current channel? (*not* a context switch) | ||
395 | main_not_ctx_switch: | ||
396 | cmpu b32 $r14 0x0001 | ||
397 | bra ne #main_not_ctx_chan | ||
398 | mov b32 $r2 $r15 | ||
399 | call #ctx_chan | ||
400 | bra #main_done | ||
401 | |||
402 | // request to store current channel context? | ||
403 | main_not_ctx_chan: | ||
404 | cmpu b32 $r14 0x0002 | ||
405 | bra ne #main_not_ctx_save | ||
406 | trace_set(T_SAVE) | ||
407 | bclr $flags $p1 | ||
408 | bclr $flags $p2 | ||
409 | call #ctx_xfer | ||
410 | trace_clr(T_SAVE) | ||
411 | bra #main_done | ||
412 | |||
413 | main_not_ctx_save: | ||
414 | shl b32 $r15 $r14 16 | ||
415 | or $r15 E_BAD_COMMAND | ||
416 | call #error | ||
417 | bra #main | ||
418 | |||
419 | main_done: | ||
420 | mov $r1 0x820 | ||
421 | shl b32 $r1 6 | ||
422 | clear b32 $r2 | ||
423 | bset $r2 31 | ||
424 | iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 | ||
425 | bra #main | ||
426 | |||
427 | // interrupt handler | ||
428 | ih: | ||
429 | push $r8 | ||
430 | mov $r8 $flags | ||
431 | push $r8 | ||
432 | push $r9 | ||
433 | push $r10 | ||
434 | push $r11 | ||
435 | push $r13 | ||
436 | push $r14 | ||
437 | push $r15 | ||
438 | |||
439 | // incoming fifo command? | ||
440 | iord $r10 I[$r0 + 0x200] // INTR | ||
441 | and $r11 $r10 0x00000004 | ||
442 | bra e #ih_no_fifo | ||
443 | // queue incoming fifo command for later processing | ||
444 | mov $r11 0x1900 | ||
445 | mov $r13 #cmd_queue | ||
446 | iord $r14 I[$r11 + 0x100] // FIFO_CMD | ||
447 | iord $r15 I[$r11 + 0x000] // FIFO_DATA | ||
448 | call #queue_put | ||
449 | add b32 $r11 0x400 | ||
450 | mov $r14 1 | ||
451 | iowr I[$r11 + 0x000] $r14 // FIFO_ACK | ||
452 | |||
453 | // context switch request? | ||
454 | ih_no_fifo: | ||
455 | and $r11 $r10 0x00000100 | ||
456 | bra e #ih_no_ctxsw | ||
457 | // enqueue a context switch for later processing | ||
458 | mov $r13 #cmd_queue | ||
459 | mov $r14 0x4001 | ||
460 | call #queue_put | ||
461 | |||
462 | // anything we didn't handle, bring it to the host's attention | ||
463 | ih_no_ctxsw: | ||
464 | mov $r11 0x104 | ||
465 | not b32 $r11 | ||
466 | and $r11 $r10 $r11 | ||
467 | bra e #ih_no_other | ||
468 | mov $r10 0xc1c | ||
469 | shl b32 $r10 6 | ||
470 | iowr I[$r10] $r11 // INTR_UP_SET | ||
471 | |||
472 | // ack, and wake up main() | ||
473 | ih_no_other: | ||
474 | iowr I[$r0 + 0x100] $r10 // INTR_ACK | ||
475 | |||
476 | pop $r15 | ||
477 | pop $r14 | ||
478 | pop $r13 | ||
479 | pop $r11 | ||
480 | pop $r10 | ||
481 | pop $r9 | ||
482 | pop $r8 | ||
483 | mov $flags $r8 | ||
484 | pop $r8 | ||
485 | bclr $flags $p0 | ||
486 | iret | ||
487 | |||
488 | // Not real sure, but, MEM_CMD 7 will hang forever if this isn't done | ||
489 | ctx_4160s: | ||
490 | mov $r14 0x4160 | ||
491 | sethi $r14 0x400000 | ||
492 | mov $r15 1 | ||
493 | call #nv_wr32 | ||
494 | ctx_4160s_wait: | ||
495 | call #nv_rd32 | ||
496 | xbit $r15 $r15 4 | ||
497 | bra e #ctx_4160s_wait | ||
498 | ret | ||
499 | |||
500 | // Without clearing again at end of xfer, some things cause PGRAPH | ||
501 | // to hang with STATUS=0x00000007 until it's cleared.. fbcon can | ||
502 | // still function with it set however... | ||
503 | ctx_4160c: | ||
504 | mov $r14 0x4160 | ||
505 | sethi $r14 0x400000 | ||
506 | clear b32 $r15 | ||
507 | call #nv_wr32 | ||
508 | ret | ||
509 | |||
510 | // Again, not real sure | ||
511 | // | ||
512 | // In: $r15 value to set 0x404170 to | ||
513 | // | ||
514 | ctx_4170s: | ||
515 | mov $r14 0x4170 | ||
516 | sethi $r14 0x400000 | ||
517 | or $r15 0x10 | ||
518 | call #nv_wr32 | ||
519 | ret | ||
520 | |||
521 | // Waits for a ctx_4170s() call to complete | ||
522 | // | ||
523 | ctx_4170w: | ||
524 | mov $r14 0x4170 | ||
525 | sethi $r14 0x400000 | ||
526 | call #nv_rd32 | ||
527 | and $r15 0x10 | ||
528 | bra ne #ctx_4170w | ||
529 | ret | ||
530 | |||
531 | // Disables various things, waits a bit, and re-enables them.. | ||
532 | // | ||
533 | // Not sure how exactly this helps, perhaps "ENABLE" is not such a | ||
534 | // good description for the bits we turn off? Anyways, without this, | ||
535 | // funny things happen. | ||
536 | // | ||
537 | ctx_redswitch: | ||
538 | mov $r14 0x614 | ||
539 | shl b32 $r14 6 | ||
540 | mov $r15 0x270 | ||
541 | iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL | ||
542 | mov $r15 8 | ||
543 | ctx_redswitch_delay: | ||
544 | sub b32 $r15 1 | ||
545 | bra ne #ctx_redswitch_delay | ||
546 | mov $r15 0x770 | ||
547 | iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL | ||
548 | ret | ||
549 | |||
550 | // Not a clue what this is for, except that unless the value is 0x10, the | ||
551 | // strand context is saved (and presumably restored) incorrectly.. | ||
552 | // | ||
553 | // In: $r15 value to set to (0x00/0x10 are used) | ||
554 | // | ||
555 | ctx_86c: | ||
556 | mov $r14 0x86c | ||
557 | shl b32 $r14 6 | ||
558 | iowr I[$r14] $r15 // HUB(0x86c) = val | ||
559 | mov $r14 -0x75ec | ||
560 | sethi $r14 0x400000 | ||
561 | call #nv_wr32 // ROP(0xa14) = val | ||
562 | mov $r14 -0x5794 | ||
563 | sethi $r14 0x410000 | ||
564 | call #nv_wr32 // GPC(0x86c) = val | ||
565 | ret | ||
566 | |||
567 | // ctx_load - load's a channel's ctxctl data, and selects its vm | ||
568 | // | ||
569 | // In: $r2 channel address | ||
570 | // | ||
571 | ctx_load: | ||
572 | trace_set(T_CHAN) | ||
573 | |||
574 | // switch to channel, somewhat magic in parts.. | ||
575 | mov $r10 12 // DONE_UNK12 | ||
576 | call #wait_donez | ||
577 | mov $r1 0xa24 | ||
578 | shl b32 $r1 6 | ||
579 | iowr I[$r1 + 0x000] $r0 // 0x409a24 | ||
580 | mov $r3 0xb00 | ||
581 | shl b32 $r3 6 | ||
582 | iowr I[$r3 + 0x100] $r2 // CHAN_NEXT | ||
583 | mov $r1 0xa0c | ||
584 | shl b32 $r1 6 | ||
585 | mov $r4 7 | ||
586 | iowr I[$r1 + 0x000] $r2 // MEM_CHAN | ||
587 | iowr I[$r1 + 0x100] $r4 // MEM_CMD | ||
588 | ctx_chan_wait_0: | ||
589 | iord $r4 I[$r1 + 0x100] | ||
590 | and $r4 0x1f | ||
591 | bra ne #ctx_chan_wait_0 | ||
592 | iowr I[$r3 + 0x000] $r2 // CHAN_CUR | ||
593 | |||
594 | // load channel header, fetch PGRAPH context pointer | ||
595 | mov $xtargets $r0 | ||
596 | bclr $r2 31 | ||
597 | shl b32 $r2 4 | ||
598 | add b32 $r2 2 | ||
599 | |||
600 | trace_set(T_LCHAN) | ||
601 | mov $r1 0xa04 | ||
602 | shl b32 $r1 6 | ||
603 | iowr I[$r1 + 0x000] $r2 // MEM_BASE | ||
604 | mov $r1 0xa20 | ||
605 | shl b32 $r1 6 | ||
606 | mov $r2 0x0002 | ||
607 | sethi $r2 0x80000000 | ||
608 | iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram | ||
609 | mov $r1 0x10 // chan + 0x0210 | ||
610 | mov $r2 #xfer_data | ||
611 | sethi $r2 0x00020000 // 16 bytes | ||
612 | xdld $r1 $r2 | ||
613 | xdwait | ||
614 | trace_clr(T_LCHAN) | ||
615 | |||
616 | // update current context | ||
617 | ld b32 $r1 D[$r0 + #xfer_data + 4] | ||
618 | shl b32 $r1 24 | ||
619 | ld b32 $r2 D[$r0 + #xfer_data + 0] | ||
620 | shr b32 $r2 8 | ||
621 | or $r1 $r2 | ||
622 | st b32 D[$r0 + #ctx_current] $r1 | ||
623 | |||
624 | // set transfer base to start of context, and fetch context header | ||
625 | trace_set(T_LCTXH) | ||
626 | mov $r2 0xa04 | ||
627 | shl b32 $r2 6 | ||
628 | iowr I[$r2 + 0x000] $r1 // MEM_BASE | ||
629 | mov $r2 1 | ||
630 | mov $r1 0xa20 | ||
631 | shl b32 $r1 6 | ||
632 | iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm | ||
633 | mov $r1 #chan_data | ||
634 | sethi $r1 0x00060000 // 256 bytes | ||
635 | xdld $r0 $r1 | ||
636 | xdwait | ||
637 | trace_clr(T_LCTXH) | ||
638 | |||
639 | trace_clr(T_CHAN) | ||
640 | ret | ||
641 | |||
642 | // ctx_chan - handler for HUB_SET_CHAN command, will set a channel as | ||
643 | // the active channel for ctxctl, but not actually transfer | ||
644 | // any context data. intended for use only during initial | ||
645 | // context construction. | ||
646 | // | ||
647 | // In: $r2 channel address | ||
648 | // | ||
649 | ctx_chan: | ||
650 | call #ctx_4160s | ||
651 | call #ctx_load | ||
652 | mov $r10 12 // DONE_UNK12 | ||
653 | call #wait_donez | ||
654 | mov $r1 0xa10 | ||
655 | shl b32 $r1 6 | ||
656 | mov $r2 5 | ||
657 | iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???) | ||
658 | ctx_chan_wait: | ||
659 | iord $r2 I[$r1 + 0x000] | ||
660 | or $r2 $r2 | ||
661 | bra ne #ctx_chan_wait | ||
662 | call #ctx_4160c | ||
663 | ret | ||
664 | |||
665 | // Execute per-context state overrides list | ||
666 | // | ||
667 | // Only executed on the first load of a channel. Might want to look into | ||
668 | // removing this and having the host directly modify the channel's context | ||
669 | // to change this state... The nouveau DRM already builds this list as | ||
670 | // it's definitely needed for NVIDIA's, so we may as well use it for now | ||
671 | // | ||
672 | // Input: $r1 mmio list length | ||
673 | // | ||
674 | ctx_mmio_exec: | ||
675 | // set transfer base to be the mmio list | ||
676 | ld b32 $r3 D[$r0 + #chan_mmio_address] | ||
677 | mov $r2 0xa04 | ||
678 | shl b32 $r2 6 | ||
679 | iowr I[$r2 + 0x000] $r3 // MEM_BASE | ||
680 | |||
681 | clear b32 $r3 | ||
682 | ctx_mmio_loop: | ||
683 | // fetch next 256 bytes of mmio list if necessary | ||
684 | and $r4 $r3 0xff | ||
685 | bra ne #ctx_mmio_pull | ||
686 | mov $r5 #xfer_data | ||
687 | sethi $r5 0x00060000 // 256 bytes | ||
688 | xdld $r3 $r5 | ||
689 | xdwait | ||
690 | |||
691 | // execute a single list entry | ||
692 | ctx_mmio_pull: | ||
693 | ld b32 $r14 D[$r4 + #xfer_data + 0x00] | ||
694 | ld b32 $r15 D[$r4 + #xfer_data + 0x04] | ||
695 | call #nv_wr32 | ||
696 | |||
697 | // next! | ||
698 | add b32 $r3 8 | ||
699 | sub b32 $r1 1 | ||
700 | bra ne #ctx_mmio_loop | ||
701 | |||
702 | // set transfer base back to the current context | ||
703 | ctx_mmio_done: | ||
704 | ld b32 $r3 D[$r0 + #ctx_current] | ||
705 | iowr I[$r2 + 0x000] $r3 // MEM_BASE | ||
706 | |||
707 | // disable the mmio list now, we don't need/want to execute it again | ||
708 | st b32 D[$r0 + #chan_mmio_count] $r0 | ||
709 | mov $r1 #chan_data | ||
710 | sethi $r1 0x00060000 // 256 bytes | ||
711 | xdst $r0 $r1 | ||
712 | xdwait | ||
713 | ret | ||
714 | |||
715 | // Transfer HUB context data between GPU and storage area | ||
716 | // | ||
717 | // In: $r2 channel address | ||
718 | // $p1 clear on save, set on load | ||
719 | // $p2 set if opposite direction done/will be done, so: | ||
720 | // on save it means: "a load will follow this save" | ||
721 | // on load it means: "a save preceeded this load" | ||
722 | // | ||
723 | ctx_xfer: | ||
724 | // according to mwk, some kind of wait for idle | ||
725 | mov $r15 0xc00 | ||
726 | shl b32 $r15 6 | ||
727 | mov $r14 4 | ||
728 | iowr I[$r15 + 0x200] $r14 | ||
729 | ctx_xfer_idle: | ||
730 | iord $r14 I[$r15 + 0x000] | ||
731 | and $r14 0x2000 | ||
732 | bra ne #ctx_xfer_idle | ||
733 | |||
734 | bra not $p1 #ctx_xfer_pre | ||
735 | bra $p2 #ctx_xfer_pre_load | ||
736 | ctx_xfer_pre: | ||
737 | mov $r15 0x10 | ||
738 | call #ctx_86c | ||
739 | call #ctx_4160s | ||
740 | bra not $p1 #ctx_xfer_exec | ||
741 | |||
742 | ctx_xfer_pre_load: | ||
743 | mov $r15 2 | ||
744 | call #ctx_4170s | ||
745 | call #ctx_4170w | ||
746 | call #ctx_redswitch | ||
747 | clear b32 $r15 | ||
748 | call #ctx_4170s | ||
749 | call #ctx_load | ||
750 | |||
751 | // fetch context pointer, and initiate xfer on all GPCs | ||
752 | ctx_xfer_exec: | ||
753 | ld b32 $r1 D[$r0 + #ctx_current] | ||
754 | mov $r2 0x414 | ||
755 | shl b32 $r2 6 | ||
756 | iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset | ||
757 | mov $r14 -0x5b00 | ||
758 | sethi $r14 0x410000 | ||
759 | mov b32 $r15 $r1 | ||
760 | call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer | ||
761 | add b32 $r14 4 | ||
762 | xbit $r15 $flags $p1 | ||
763 | xbit $r2 $flags $p2 | ||
764 | shl b32 $r2 1 | ||
765 | or $r15 $r2 | ||
766 | call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type) | ||
767 | |||
768 | // strands | ||
769 | mov $r1 0x4afc | ||
770 | sethi $r1 0x20000 | ||
771 | mov $r2 0xc | ||
772 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c | ||
773 | call #strand_wait | ||
774 | mov $r2 0x47fc | ||
775 | sethi $r2 0x20000 | ||
776 | iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 | ||
777 | xbit $r2 $flags $p1 | ||
778 | add b32 $r2 3 | ||
779 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) | ||
780 | |||
781 | // mmio context | ||
782 | xbit $r10 $flags $p1 // direction | ||
783 | or $r10 6 // first, last | ||
784 | mov $r11 0 // base = 0 | ||
785 | ld b32 $r12 D[$r0 + #hub_mmio_list_head] | ||
786 | ld b32 $r13 D[$r0 + #hub_mmio_list_tail] | ||
787 | mov $r14 0 // not multi | ||
788 | call #mmctx_xfer | ||
789 | |||
790 | // wait for GPCs to all complete | ||
791 | mov $r10 8 // DONE_BAR | ||
792 | call #wait_doneo | ||
793 | |||
794 | // wait for strand xfer to complete | ||
795 | call #strand_wait | ||
796 | |||
797 | // post-op | ||
798 | bra $p1 #ctx_xfer_post | ||
799 | mov $r10 12 // DONE_UNK12 | ||
800 | call #wait_donez | ||
801 | mov $r1 0xa10 | ||
802 | shl b32 $r1 6 | ||
803 | mov $r2 5 | ||
804 | iowr I[$r1] $r2 // MEM_CMD | ||
805 | ctx_xfer_post_save_wait: | ||
806 | iord $r2 I[$r1] | ||
807 | or $r2 $r2 | ||
808 | bra ne #ctx_xfer_post_save_wait | ||
809 | |||
810 | bra $p2 #ctx_xfer_done | ||
811 | ctx_xfer_post: | ||
812 | mov $r15 2 | ||
813 | call #ctx_4170s | ||
814 | clear b32 $r15 | ||
815 | call #ctx_86c | ||
816 | call #strand_post | ||
817 | call #ctx_4170w | ||
818 | clear b32 $r15 | ||
819 | call #ctx_4170s | ||
820 | |||
821 | bra not $p1 #ctx_xfer_no_post_mmio | ||
822 | ld b32 $r1 D[$r0 + #chan_mmio_count] | ||
823 | or $r1 $r1 | ||
824 | bra e #ctx_xfer_no_post_mmio | ||
825 | call #ctx_mmio_exec | ||
826 | |||
827 | ctx_xfer_no_post_mmio: | ||
828 | call #ctx_4160c | ||
829 | |||
830 | ctx_xfer_done: | ||
831 | ret | ||
832 | |||
833 | .align 256 | 118 | .align 256 |
119 | #undef INCLUDE_CODE | ||
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h index 0953c2db2d13..d1bf23001830 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h | |||
@@ -338,7 +338,7 @@ uint32_t nvc0_grhub_code[] = { | |||
338 | 0x0089d000, | 338 | 0x0089d000, |
339 | 0x081887f1, | 339 | 0x081887f1, |
340 | 0xd00684b6, | 340 | 0xd00684b6, |
341 | /* 0x00e2: wait_done_wait_donez */ | 341 | /* 0x00e2: wait_donez_ne */ |
342 | 0x87f1008a, | 342 | 0x87f1008a, |
343 | 0x84b60400, | 343 | 0x84b60400, |
344 | 0x0088cf06, | 344 | 0x0088cf06, |
@@ -355,7 +355,7 @@ uint32_t nvc0_grhub_code[] = { | |||
355 | 0x87f10089, | 355 | 0x87f10089, |
356 | 0x84b60818, | 356 | 0x84b60818, |
357 | 0x008ad006, | 357 | 0x008ad006, |
358 | /* 0x011c: wait_done_wait_doneo */ | 358 | /* 0x011c: wait_doneo_e */ |
359 | 0x040087f1, | 359 | 0x040087f1, |
360 | 0xcf0684b6, | 360 | 0xcf0684b6, |
361 | 0x8aff0088, | 361 | 0x8aff0088, |
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc index b57a3db8df71..c7225db6486c 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc | |||
@@ -1,6 +1,5 @@ | |||
1 | /* fuc microcode for nve0 PGRAPH/HUB | 1 | /* |
2 | * | 2 | * Copyright 2013 Red Hat Inc. |
3 | * Copyright 2011 Red Hat Inc. | ||
4 | * | 3 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), | 5 | * copy of this software and associated documentation files (the "Software"), |
@@ -20,32 +19,17 @@ | |||
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. | 20 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | * | 21 | * |
23 | * Authors: Ben Skeggs | 22 | * Authors: Ben Skeggs <bskeggs@redhat.com> |
24 | */ | 23 | */ |
25 | 24 | ||
26 | /* To build: | 25 | #define NVGK |
27 | * m4 nve0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nve0_grhub.fuc.h | 26 | #include "macros.fuc" |
28 | */ | ||
29 | 27 | ||
30 | .section #nve0_grhub_data | 28 | .section #nve0_grhub_data |
31 | include(`nve0.fuc') | 29 | #define INCLUDE_DATA |
32 | gpc_count: .b32 0 | 30 | #include "com.fuc" |
33 | rop_count: .b32 0 | 31 | #include "hub.fuc" |
34 | cmd_queue: queue_init | ||
35 | hub_mmio_list_head: .b32 0 | ||
36 | hub_mmio_list_tail: .b32 0 | ||
37 | |||
38 | ctx_current: .b32 0 | ||
39 | |||
40 | .align 256 | ||
41 | chan_data: | ||
42 | chan_mmio_count: .b32 0 | ||
43 | chan_mmio_address: .b32 0 | ||
44 | 32 | ||
45 | .align 256 | ||
46 | xfer_data: .b32 0 | ||
47 | |||
48 | .align 256 | ||
49 | chipsets: | 33 | chipsets: |
50 | .b8 0xe4 0 0 0 | 34 | .b8 0xe4 0 0 0 |
51 | .b16 #nve4_hub_mmio_head | 35 | .b16 #nve4_hub_mmio_head |
@@ -170,684 +154,12 @@ mmctx_data(0x408840, 1) | |||
170 | mmctx_data(0x408900, 3) | 154 | mmctx_data(0x408900, 3) |
171 | mmctx_data(0x408980, 1) | 155 | mmctx_data(0x408980, 1) |
172 | nvf0_hub_mmio_tail: | 156 | nvf0_hub_mmio_tail: |
157 | #undef INCLUDE_DATA | ||
173 | 158 | ||
174 | .section #nve0_grhub_code | 159 | .section #nve0_grhub_code |
160 | #define INCLUDE_CODE | ||
175 | bra #init | 161 | bra #init |
176 | define(`include_code') | 162 | #include "com.fuc" |
177 | include(`nve0.fuc') | 163 | #include "hub.fuc" |
178 | |||
179 | // reports an exception to the host | ||
180 | // | ||
181 | // In: $r15 error code (see nve0.fuc) | ||
182 | // | ||
183 | error: | ||
184 | push $r14 | ||
185 | mov $r14 0x814 | ||
186 | shl b32 $r14 6 | ||
187 | iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code | ||
188 | mov $r14 0xc1c | ||
189 | shl b32 $r14 6 | ||
190 | mov $r15 1 | ||
191 | iowr I[$r14 + 0x000] $r15 // INTR_UP_SET | ||
192 | pop $r14 | ||
193 | ret | ||
194 | |||
195 | // HUB fuc initialisation, executed by triggering ucode start, will | ||
196 | // fall through to main loop after completion. | ||
197 | // | ||
198 | // Input: | ||
199 | // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) | ||
200 | // | ||
201 | // Output: | ||
202 | // CC_SCRATCH[0]: | ||
203 | // 31:31: set to signal completion | ||
204 | // CC_SCRATCH[1]: | ||
205 | // 31:0: total PGRAPH context size | ||
206 | // | ||
207 | init: | ||
208 | clear b32 $r0 | ||
209 | mov $sp $r0 | ||
210 | mov $xdbase $r0 | ||
211 | |||
212 | // enable fifo access | ||
213 | mov $r1 0x1200 | ||
214 | mov $r2 2 | ||
215 | iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE | ||
216 | |||
217 | // setup i0 handler, and route all interrupts to it | ||
218 | mov $r1 #ih | ||
219 | mov $iv0 $r1 | ||
220 | mov $r1 0x400 | ||
221 | iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH | ||
222 | |||
223 | // route HUB_CHANNEL_SWITCH to fuc interrupt 8 | ||
224 | mov $r3 0x404 | ||
225 | shl b32 $r3 6 | ||
226 | mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8 | ||
227 | iowr I[$r3 + 0x000] $r2 | ||
228 | |||
229 | // not sure what these are, route them because NVIDIA does, and | ||
230 | // the IRQ handler will signal the host if we ever get one.. we | ||
231 | // may find out if/why we need to handle these if so.. | ||
232 | // | ||
233 | mov $r2 0x2004 | ||
234 | iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9 | ||
235 | mov $r2 0x200b | ||
236 | iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10 | ||
237 | mov $r2 0x200c | ||
238 | iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15 | ||
239 | |||
240 | // enable all INTR_UP interrupts | ||
241 | mov $r2 0xc24 | ||
242 | shl b32 $r2 6 | ||
243 | not b32 $r3 $r0 | ||
244 | iowr I[$r2] $r3 | ||
245 | |||
246 | // enable fifo, ctxsw, 9, 10, 15 interrupts | ||
247 | mov $r2 -0x78fc // 0x8704 | ||
248 | sethi $r2 0 | ||
249 | iowr I[$r1 + 0x000] $r2 // INTR_EN_SET | ||
250 | |||
251 | // fifo level triggered, rest edge | ||
252 | sub b32 $r1 0x100 | ||
253 | mov $r2 4 | ||
254 | iowr I[$r1] $r2 | ||
255 | |||
256 | // enable interrupts | ||
257 | bset $flags ie0 | ||
258 | |||
259 | // fetch enabled GPC/ROP counts | ||
260 | mov $r14 -0x69fc // 0x409604 | ||
261 | sethi $r14 0x400000 | ||
262 | call #nv_rd32 | ||
263 | extr $r1 $r15 16:20 | ||
264 | st b32 D[$r0 + #rop_count] $r1 | ||
265 | and $r15 0x1f | ||
266 | st b32 D[$r0 + #gpc_count] $r15 | ||
267 | |||
268 | // set BAR_REQMASK to GPC mask | ||
269 | mov $r1 1 | ||
270 | shl b32 $r1 $r15 | ||
271 | sub b32 $r1 1 | ||
272 | mov $r2 0x40c | ||
273 | shl b32 $r2 6 | ||
274 | iowr I[$r2 + 0x000] $r1 | ||
275 | iowr I[$r2 + 0x100] $r1 | ||
276 | |||
277 | // find context data for this chipset | ||
278 | mov $r2 0x800 | ||
279 | shl b32 $r2 6 | ||
280 | iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] | ||
281 | mov $r15 #chipsets - 8 | ||
282 | init_find_chipset: | ||
283 | add b32 $r15 8 | ||
284 | ld b32 $r3 D[$r15 + 0x00] | ||
285 | cmpu b32 $r3 $r2 | ||
286 | bra e #init_context | ||
287 | cmpu b32 $r3 0 | ||
288 | bra ne #init_find_chipset | ||
289 | // unknown chipset | ||
290 | ret | ||
291 | |||
292 | // context size calculation, reserve first 256 bytes for use by fuc | ||
293 | init_context: | ||
294 | mov $r1 256 | ||
295 | |||
296 | // calculate size of mmio context data | ||
297 | ld b16 $r14 D[$r15 + 4] | ||
298 | ld b16 $r15 D[$r15 + 6] | ||
299 | sethi $r14 0 | ||
300 | st b32 D[$r0 + #hub_mmio_list_head] $r14 | ||
301 | st b32 D[$r0 + #hub_mmio_list_tail] $r15 | ||
302 | call #mmctx_size | ||
303 | |||
304 | // set mmctx base addresses now so we don't have to do it later, | ||
305 | // they don't (currently) ever change | ||
306 | mov $r3 0x700 | ||
307 | shl b32 $r3 6 | ||
308 | shr b32 $r4 $r1 8 | ||
309 | iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE | ||
310 | iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE | ||
311 | add b32 $r3 0x1300 | ||
312 | add b32 $r1 $r15 | ||
313 | shr b32 $r15 2 | ||
314 | iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!? | ||
315 | |||
316 | // strands, base offset needs to be aligned to 256 bytes | ||
317 | shr b32 $r1 8 | ||
318 | add b32 $r1 1 | ||
319 | shl b32 $r1 8 | ||
320 | mov b32 $r15 $r1 | ||
321 | call #strand_ctx_init | ||
322 | add b32 $r1 $r15 | ||
323 | |||
324 | // initialise each GPC in sequence by passing in the offset of its | ||
325 | // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which | ||
326 | // has previously been uploaded by the host) running. | ||
327 | // | ||
328 | // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31 | ||
329 | // when it has completed, and return the size of its context data | ||
330 | // in GPCn_CC_SCRATCH[1] | ||
331 | // | ||
332 | ld b32 $r3 D[$r0 + #gpc_count] | ||
333 | mov $r4 0x2000 | ||
334 | sethi $r4 0x500000 | ||
335 | init_gpc: | ||
336 | // setup, and start GPC ucode running | ||
337 | add b32 $r14 $r4 0x804 | ||
338 | mov b32 $r15 $r1 | ||
339 | call #nv_wr32 // CC_SCRATCH[1] = ctx offset | ||
340 | add b32 $r14 $r4 0x800 | ||
341 | mov b32 $r15 $r2 | ||
342 | call #nv_wr32 // CC_SCRATCH[0] = chipset | ||
343 | add b32 $r14 $r4 0x10c | ||
344 | clear b32 $r15 | ||
345 | call #nv_wr32 | ||
346 | add b32 $r14 $r4 0x104 | ||
347 | call #nv_wr32 // ENTRY | ||
348 | add b32 $r14 $r4 0x100 | ||
349 | mov $r15 2 // CTRL_START_TRIGGER | ||
350 | call #nv_wr32 // CTRL | ||
351 | |||
352 | // wait for it to complete, and adjust context size | ||
353 | add b32 $r14 $r4 0x800 | ||
354 | init_gpc_wait: | ||
355 | call #nv_rd32 | ||
356 | xbit $r15 $r15 31 | ||
357 | bra e #init_gpc_wait | ||
358 | add b32 $r14 $r4 0x804 | ||
359 | call #nv_rd32 | ||
360 | add b32 $r1 $r15 | ||
361 | |||
362 | // next! | ||
363 | add b32 $r4 0x8000 | ||
364 | sub b32 $r3 1 | ||
365 | bra ne #init_gpc | ||
366 | |||
367 | // save context size, and tell host we're ready | ||
368 | mov $r2 0x800 | ||
369 | shl b32 $r2 6 | ||
370 | iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size | ||
371 | add b32 $r2 0x800 | ||
372 | clear b32 $r1 | ||
373 | bset $r1 31 | ||
374 | iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000 | ||
375 | |||
376 | // Main program loop, very simple, sleeps until woken up by the interrupt | ||
377 | // handler, pulls a command from the queue and executes its handler | ||
378 | // | ||
379 | main: | ||
380 | // sleep until we have something to do | ||
381 | bset $flags $p0 | ||
382 | sleep $p0 | ||
383 | mov $r13 #cmd_queue | ||
384 | call #queue_get | ||
385 | bra $p1 #main | ||
386 | |||
387 | // context switch, requested by GPU? | ||
388 | cmpu b32 $r14 0x4001 | ||
389 | bra ne #main_not_ctx_switch | ||
390 | trace_set(T_AUTO) | ||
391 | mov $r1 0xb00 | ||
392 | shl b32 $r1 6 | ||
393 | iord $r2 I[$r1 + 0x100] // CHAN_NEXT | ||
394 | iord $r1 I[$r1 + 0x000] // CHAN_CUR | ||
395 | |||
396 | xbit $r3 $r1 31 | ||
397 | bra e #chsw_no_prev | ||
398 | xbit $r3 $r2 31 | ||
399 | bra e #chsw_prev_no_next | ||
400 | push $r2 | ||
401 | mov b32 $r2 $r1 | ||
402 | trace_set(T_SAVE) | ||
403 | bclr $flags $p1 | ||
404 | bset $flags $p2 | ||
405 | call #ctx_xfer | ||
406 | trace_clr(T_SAVE); | ||
407 | pop $r2 | ||
408 | trace_set(T_LOAD); | ||
409 | bset $flags $p1 | ||
410 | call #ctx_xfer | ||
411 | trace_clr(T_LOAD); | ||
412 | bra #chsw_done | ||
413 | chsw_prev_no_next: | ||
414 | push $r2 | ||
415 | mov b32 $r2 $r1 | ||
416 | bclr $flags $p1 | ||
417 | bclr $flags $p2 | ||
418 | call #ctx_xfer | ||
419 | pop $r2 | ||
420 | mov $r1 0xb00 | ||
421 | shl b32 $r1 6 | ||
422 | iowr I[$r1] $r2 | ||
423 | bra #chsw_done | ||
424 | chsw_no_prev: | ||
425 | xbit $r3 $r2 31 | ||
426 | bra e #chsw_done | ||
427 | bset $flags $p1 | ||
428 | bclr $flags $p2 | ||
429 | call #ctx_xfer | ||
430 | |||
431 | // ack the context switch request | ||
432 | chsw_done: | ||
433 | mov $r1 0xb0c | ||
434 | shl b32 $r1 6 | ||
435 | mov $r2 1 | ||
436 | iowr I[$r1 + 0x000] $r2 // 0x409b0c | ||
437 | trace_clr(T_AUTO) | ||
438 | bra #main | ||
439 | |||
440 | // request to set current channel? (*not* a context switch) | ||
441 | main_not_ctx_switch: | ||
442 | cmpu b32 $r14 0x0001 | ||
443 | bra ne #main_not_ctx_chan | ||
444 | mov b32 $r2 $r15 | ||
445 | call #ctx_chan | ||
446 | bra #main_done | ||
447 | |||
448 | // request to store current channel context? | ||
449 | main_not_ctx_chan: | ||
450 | cmpu b32 $r14 0x0002 | ||
451 | bra ne #main_not_ctx_save | ||
452 | trace_set(T_SAVE) | ||
453 | bclr $flags $p1 | ||
454 | bclr $flags $p2 | ||
455 | call #ctx_xfer | ||
456 | trace_clr(T_SAVE) | ||
457 | bra #main_done | ||
458 | |||
459 | main_not_ctx_save: | ||
460 | shl b32 $r15 $r14 16 | ||
461 | or $r15 E_BAD_COMMAND | ||
462 | call #error | ||
463 | bra #main | ||
464 | |||
465 | main_done: | ||
466 | mov $r1 0x820 | ||
467 | shl b32 $r1 6 | ||
468 | clear b32 $r2 | ||
469 | bset $r2 31 | ||
470 | iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 | ||
471 | bra #main | ||
472 | |||
473 | // interrupt handler | ||
474 | ih: | ||
475 | push $r8 | ||
476 | mov $r8 $flags | ||
477 | push $r8 | ||
478 | push $r9 | ||
479 | push $r10 | ||
480 | push $r11 | ||
481 | push $r13 | ||
482 | push $r14 | ||
483 | push $r15 | ||
484 | |||
485 | // incoming fifo command? | ||
486 | iord $r10 I[$r0 + 0x200] // INTR | ||
487 | and $r11 $r10 0x00000004 | ||
488 | bra e #ih_no_fifo | ||
489 | // queue incoming fifo command for later processing | ||
490 | mov $r11 0x1900 | ||
491 | mov $r13 #cmd_queue | ||
492 | iord $r14 I[$r11 + 0x100] // FIFO_CMD | ||
493 | iord $r15 I[$r11 + 0x000] // FIFO_DATA | ||
494 | call #queue_put | ||
495 | add b32 $r11 0x400 | ||
496 | mov $r14 1 | ||
497 | iowr I[$r11 + 0x000] $r14 // FIFO_ACK | ||
498 | |||
499 | // context switch request? | ||
500 | ih_no_fifo: | ||
501 | and $r11 $r10 0x00000100 | ||
502 | bra e #ih_no_ctxsw | ||
503 | // enqueue a context switch for later processing | ||
504 | mov $r13 #cmd_queue | ||
505 | mov $r14 0x4001 | ||
506 | call #queue_put | ||
507 | |||
508 | // anything we didn't handle, bring it to the host's attention | ||
509 | ih_no_ctxsw: | ||
510 | mov $r11 0x104 | ||
511 | not b32 $r11 | ||
512 | and $r11 $r10 $r11 | ||
513 | bra e #ih_no_other | ||
514 | mov $r10 0xc1c | ||
515 | shl b32 $r10 6 | ||
516 | iowr I[$r10] $r11 // INTR_UP_SET | ||
517 | |||
518 | // ack, and wake up main() | ||
519 | ih_no_other: | ||
520 | iowr I[$r0 + 0x100] $r10 // INTR_ACK | ||
521 | |||
522 | pop $r15 | ||
523 | pop $r14 | ||
524 | pop $r13 | ||
525 | pop $r11 | ||
526 | pop $r10 | ||
527 | pop $r9 | ||
528 | pop $r8 | ||
529 | mov $flags $r8 | ||
530 | pop $r8 | ||
531 | bclr $flags $p0 | ||
532 | iret | ||
533 | |||
534 | // Again, not real sure | ||
535 | // | ||
536 | // In: $r15 value to set 0x404170 to | ||
537 | // | ||
538 | ctx_4170s: | ||
539 | mov $r14 0x4170 | ||
540 | sethi $r14 0x400000 | ||
541 | or $r15 0x10 | ||
542 | call #nv_wr32 | ||
543 | ret | ||
544 | |||
545 | // Waits for a ctx_4170s() call to complete | ||
546 | // | ||
547 | ctx_4170w: | ||
548 | mov $r14 0x4170 | ||
549 | sethi $r14 0x400000 | ||
550 | call #nv_rd32 | ||
551 | and $r15 0x10 | ||
552 | bra ne #ctx_4170w | ||
553 | ret | ||
554 | |||
555 | // Disables various things, waits a bit, and re-enables them.. | ||
556 | // | ||
557 | // Not sure how exactly this helps, perhaps "ENABLE" is not such a | ||
558 | // good description for the bits we turn off? Anyways, without this, | ||
559 | // funny things happen. | ||
560 | // | ||
561 | ctx_redswitch: | ||
562 | mov $r14 0x614 | ||
563 | shl b32 $r14 6 | ||
564 | mov $r15 0x270 | ||
565 | iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL | ||
566 | mov $r15 8 | ||
567 | ctx_redswitch_delay: | ||
568 | sub b32 $r15 1 | ||
569 | bra ne #ctx_redswitch_delay | ||
570 | mov $r15 0x770 | ||
571 | iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL | ||
572 | ret | ||
573 | |||
574 | // Not a clue what this is for, except that unless the value is 0x10, the | ||
575 | // strand context is saved (and presumably restored) incorrectly.. | ||
576 | // | ||
577 | // In: $r15 value to set to (0x00/0x10 are used) | ||
578 | // | ||
579 | ctx_86c: | ||
580 | mov $r14 0x86c | ||
581 | shl b32 $r14 6 | ||
582 | iowr I[$r14] $r15 // HUB(0x86c) = val | ||
583 | mov $r14 -0x75ec | ||
584 | sethi $r14 0x400000 | ||
585 | call #nv_wr32 // ROP(0xa14) = val | ||
586 | mov $r14 -0x5794 | ||
587 | sethi $r14 0x410000 | ||
588 | call #nv_wr32 // GPC(0x86c) = val | ||
589 | ret | ||
590 | |||
591 | // ctx_load - load's a channel's ctxctl data, and selects its vm | ||
592 | // | ||
593 | // In: $r2 channel address | ||
594 | // | ||
595 | ctx_load: | ||
596 | trace_set(T_CHAN) | ||
597 | |||
598 | // switch to channel, somewhat magic in parts.. | ||
599 | mov $r10 12 // DONE_UNK12 | ||
600 | call #wait_donez | ||
601 | mov $r1 0xa24 | ||
602 | shl b32 $r1 6 | ||
603 | iowr I[$r1 + 0x000] $r0 // 0x409a24 | ||
604 | mov $r3 0xb00 | ||
605 | shl b32 $r3 6 | ||
606 | iowr I[$r3 + 0x100] $r2 // CHAN_NEXT | ||
607 | mov $r1 0xa0c | ||
608 | shl b32 $r1 6 | ||
609 | mov $r4 7 | ||
610 | iowr I[$r1 + 0x000] $r2 // MEM_CHAN | ||
611 | iowr I[$r1 + 0x100] $r4 // MEM_CMD | ||
612 | ctx_chan_wait_0: | ||
613 | iord $r4 I[$r1 + 0x100] | ||
614 | and $r4 0x1f | ||
615 | bra ne #ctx_chan_wait_0 | ||
616 | iowr I[$r3 + 0x000] $r2 // CHAN_CUR | ||
617 | |||
618 | // load channel header, fetch PGRAPH context pointer | ||
619 | mov $xtargets $r0 | ||
620 | bclr $r2 31 | ||
621 | shl b32 $r2 4 | ||
622 | add b32 $r2 2 | ||
623 | |||
624 | trace_set(T_LCHAN) | ||
625 | mov $r1 0xa04 | ||
626 | shl b32 $r1 6 | ||
627 | iowr I[$r1 + 0x000] $r2 // MEM_BASE | ||
628 | mov $r1 0xa20 | ||
629 | shl b32 $r1 6 | ||
630 | mov $r2 0x0002 | ||
631 | sethi $r2 0x80000000 | ||
632 | iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram | ||
633 | mov $r1 0x10 // chan + 0x0210 | ||
634 | mov $r2 #xfer_data | ||
635 | sethi $r2 0x00020000 // 16 bytes | ||
636 | xdld $r1 $r2 | ||
637 | xdwait | ||
638 | trace_clr(T_LCHAN) | ||
639 | |||
640 | // update current context | ||
641 | ld b32 $r1 D[$r0 + #xfer_data + 4] | ||
642 | shl b32 $r1 24 | ||
643 | ld b32 $r2 D[$r0 + #xfer_data + 0] | ||
644 | shr b32 $r2 8 | ||
645 | or $r1 $r2 | ||
646 | st b32 D[$r0 + #ctx_current] $r1 | ||
647 | |||
648 | // set transfer base to start of context, and fetch context header | ||
649 | trace_set(T_LCTXH) | ||
650 | mov $r2 0xa04 | ||
651 | shl b32 $r2 6 | ||
652 | iowr I[$r2 + 0x000] $r1 // MEM_BASE | ||
653 | mov $r2 1 | ||
654 | mov $r1 0xa20 | ||
655 | shl b32 $r1 6 | ||
656 | iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm | ||
657 | mov $r1 #chan_data | ||
658 | sethi $r1 0x00060000 // 256 bytes | ||
659 | xdld $r0 $r1 | ||
660 | xdwait | ||
661 | trace_clr(T_LCTXH) | ||
662 | |||
663 | trace_clr(T_CHAN) | ||
664 | ret | ||
665 | |||
666 | // ctx_chan - handler for HUB_SET_CHAN command, will set a channel as | ||
667 | // the active channel for ctxctl, but not actually transfer | ||
668 | // any context data. intended for use only during initial | ||
669 | // context construction. | ||
670 | // | ||
671 | // In: $r2 channel address | ||
672 | // | ||
673 | ctx_chan: | ||
674 | call #ctx_load | ||
675 | mov $r10 12 // DONE_UNK12 | ||
676 | call #wait_donez | ||
677 | mov $r1 0xa10 | ||
678 | shl b32 $r1 6 | ||
679 | mov $r2 5 | ||
680 | iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???) | ||
681 | ctx_chan_wait: | ||
682 | iord $r2 I[$r1 + 0x000] | ||
683 | or $r2 $r2 | ||
684 | bra ne #ctx_chan_wait | ||
685 | ret | ||
686 | |||
687 | // Execute per-context state overrides list | ||
688 | // | ||
689 | // Only executed on the first load of a channel. Might want to look into | ||
690 | // removing this and having the host directly modify the channel's context | ||
691 | // to change this state... The nouveau DRM already builds this list as | ||
692 | // it's definitely needed for NVIDIA's, so we may as well use it for now | ||
693 | // | ||
694 | // Input: $r1 mmio list length | ||
695 | // | ||
696 | ctx_mmio_exec: | ||
697 | // set transfer base to be the mmio list | ||
698 | ld b32 $r3 D[$r0 + #chan_mmio_address] | ||
699 | mov $r2 0xa04 | ||
700 | shl b32 $r2 6 | ||
701 | iowr I[$r2 + 0x000] $r3 // MEM_BASE | ||
702 | |||
703 | clear b32 $r3 | ||
704 | ctx_mmio_loop: | ||
705 | // fetch next 256 bytes of mmio list if necessary | ||
706 | and $r4 $r3 0xff | ||
707 | bra ne #ctx_mmio_pull | ||
708 | mov $r5 #xfer_data | ||
709 | sethi $r5 0x00060000 // 256 bytes | ||
710 | xdld $r3 $r5 | ||
711 | xdwait | ||
712 | |||
713 | // execute a single list entry | ||
714 | ctx_mmio_pull: | ||
715 | ld b32 $r14 D[$r4 + #xfer_data + 0x00] | ||
716 | ld b32 $r15 D[$r4 + #xfer_data + 0x04] | ||
717 | call #nv_wr32 | ||
718 | |||
719 | // next! | ||
720 | add b32 $r3 8 | ||
721 | sub b32 $r1 1 | ||
722 | bra ne #ctx_mmio_loop | ||
723 | |||
724 | // set transfer base back to the current context | ||
725 | ctx_mmio_done: | ||
726 | ld b32 $r3 D[$r0 + #ctx_current] | ||
727 | iowr I[$r2 + 0x000] $r3 // MEM_BASE | ||
728 | |||
729 | // disable the mmio list now, we don't need/want to execute it again | ||
730 | st b32 D[$r0 + #chan_mmio_count] $r0 | ||
731 | mov $r1 #chan_data | ||
732 | sethi $r1 0x00060000 // 256 bytes | ||
733 | xdst $r0 $r1 | ||
734 | xdwait | ||
735 | ret | ||
736 | |||
737 | // Transfer HUB context data between GPU and storage area | ||
738 | // | ||
739 | // In: $r2 channel address | ||
740 | // $p1 clear on save, set on load | ||
741 | // $p2 set if opposite direction done/will be done, so: | ||
742 | // on save it means: "a load will follow this save" | ||
743 | // on load it means: "a save preceeded this load" | ||
744 | // | ||
745 | ctx_xfer: | ||
746 | // according to mwk, some kind of wait for idle | ||
747 | mov $r15 0xc00 | ||
748 | shl b32 $r15 6 | ||
749 | mov $r14 4 | ||
750 | iowr I[$r15 + 0x200] $r14 | ||
751 | ctx_xfer_idle: | ||
752 | iord $r14 I[$r15 + 0x000] | ||
753 | and $r14 0x2000 | ||
754 | bra ne #ctx_xfer_idle | ||
755 | |||
756 | bra not $p1 #ctx_xfer_pre | ||
757 | bra $p2 #ctx_xfer_pre_load | ||
758 | ctx_xfer_pre: | ||
759 | mov $r15 0x10 | ||
760 | call #ctx_86c | ||
761 | bra not $p1 #ctx_xfer_exec | ||
762 | |||
763 | ctx_xfer_pre_load: | ||
764 | mov $r15 2 | ||
765 | call #ctx_4170s | ||
766 | call #ctx_4170w | ||
767 | call #ctx_redswitch | ||
768 | clear b32 $r15 | ||
769 | call #ctx_4170s | ||
770 | call #ctx_load | ||
771 | |||
772 | // fetch context pointer, and initiate xfer on all GPCs | ||
773 | ctx_xfer_exec: | ||
774 | ld b32 $r1 D[$r0 + #ctx_current] | ||
775 | mov $r2 0x414 | ||
776 | shl b32 $r2 6 | ||
777 | iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset | ||
778 | mov $r14 -0x5b00 | ||
779 | sethi $r14 0x410000 | ||
780 | mov b32 $r15 $r1 | ||
781 | call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer | ||
782 | add b32 $r14 4 | ||
783 | xbit $r15 $flags $p1 | ||
784 | xbit $r2 $flags $p2 | ||
785 | shl b32 $r2 1 | ||
786 | or $r15 $r2 | ||
787 | call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type) | ||
788 | |||
789 | // strands | ||
790 | mov $r1 0x4afc | ||
791 | sethi $r1 0x20000 | ||
792 | mov $r2 0xc | ||
793 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c | ||
794 | call #strand_wait | ||
795 | mov $r2 0x47fc | ||
796 | sethi $r2 0x20000 | ||
797 | iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 | ||
798 | xbit $r2 $flags $p1 | ||
799 | add b32 $r2 3 | ||
800 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) | ||
801 | |||
802 | // mmio context | ||
803 | xbit $r10 $flags $p1 // direction | ||
804 | or $r10 6 // first, last | ||
805 | mov $r11 0 // base = 0 | ||
806 | ld b32 $r12 D[$r0 + #hub_mmio_list_head] | ||
807 | ld b32 $r13 D[$r0 + #hub_mmio_list_tail] | ||
808 | mov $r14 0 // not multi | ||
809 | call #mmctx_xfer | ||
810 | |||
811 | // wait for GPCs to all complete | ||
812 | mov $r10 8 // DONE_BAR | ||
813 | call #wait_doneo | ||
814 | |||
815 | // wait for strand xfer to complete | ||
816 | call #strand_wait | ||
817 | |||
818 | // post-op | ||
819 | bra $p1 #ctx_xfer_post | ||
820 | mov $r10 12 // DONE_UNK12 | ||
821 | call #wait_donez | ||
822 | mov $r1 0xa10 | ||
823 | shl b32 $r1 6 | ||
824 | mov $r2 5 | ||
825 | iowr I[$r1] $r2 // MEM_CMD | ||
826 | ctx_xfer_post_save_wait: | ||
827 | iord $r2 I[$r1] | ||
828 | or $r2 $r2 | ||
829 | bra ne #ctx_xfer_post_save_wait | ||
830 | |||
831 | bra $p2 #ctx_xfer_done | ||
832 | ctx_xfer_post: | ||
833 | mov $r15 2 | ||
834 | call #ctx_4170s | ||
835 | clear b32 $r15 | ||
836 | call #ctx_86c | ||
837 | call #strand_post | ||
838 | call #ctx_4170w | ||
839 | clear b32 $r15 | ||
840 | call #ctx_4170s | ||
841 | |||
842 | bra not $p1 #ctx_xfer_no_post_mmio | ||
843 | ld b32 $r1 D[$r0 + #chan_mmio_count] | ||
844 | or $r1 $r1 | ||
845 | bra e #ctx_xfer_no_post_mmio | ||
846 | call #ctx_mmio_exec | ||
847 | |||
848 | ctx_xfer_no_post_mmio: | ||
849 | |||
850 | ctx_xfer_done: | ||
851 | ret | ||
852 | |||
853 | .align 256 | 164 | .align 256 |
165 | #undef INCLUDE_CODE | ||
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h index f22422e09045..623e8698ace1 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h | |||
@@ -388,7 +388,7 @@ uint32_t nve0_grhub_code[] = { | |||
388 | 0x0089d000, | 388 | 0x0089d000, |
389 | 0x081887f1, | 389 | 0x081887f1, |
390 | 0xd00684b6, | 390 | 0xd00684b6, |
391 | /* 0x00e2: wait_done_wait_donez */ | 391 | /* 0x00e2: wait_donez_ne */ |
392 | 0x87f1008a, | 392 | 0x87f1008a, |
393 | 0x84b60400, | 393 | 0x84b60400, |
394 | 0x0088cf06, | 394 | 0x0088cf06, |
@@ -405,7 +405,7 @@ uint32_t nve0_grhub_code[] = { | |||
405 | 0x87f10089, | 405 | 0x87f10089, |
406 | 0x84b60818, | 406 | 0x84b60818, |
407 | 0x008ad006, | 407 | 0x008ad006, |
408 | /* 0x011c: wait_done_wait_doneo */ | 408 | /* 0x011c: wait_doneo_e */ |
409 | 0x040087f1, | 409 | 0x040087f1, |
410 | 0xcf0684b6, | 410 | 0xcf0684b6, |
411 | 0x8aff0088, | 411 | 0x8aff0088, |
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/macros.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/macros.fuc new file mode 100644 index 000000000000..43a0b9476efd --- /dev/null +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/macros.fuc | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * Copyright 2013 Red Hat Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | * Authors: Ben Skeggs <bskeggs@redhat.com> | ||
23 | */ | ||
24 | |||
25 | #include "os.h" | ||
26 | |||
27 | #define mmctx_data(r,c) .b32 (((c - 1) << 26) | r) | ||
28 | #define queue_init .skip 72 // (2 * 4) + ((8 * 4) * 2) | ||
29 | |||
30 | #define T_WAIT 0 | ||
31 | #define T_MMCTX 1 | ||
32 | #define T_STRWAIT 2 | ||
33 | #define T_STRINIT 3 | ||
34 | #define T_AUTO 4 | ||
35 | #define T_CHAN 5 | ||
36 | #define T_LOAD 6 | ||
37 | #define T_SAVE 7 | ||
38 | #define T_LCHAN 8 | ||
39 | #define T_LCTXH 9 | ||
40 | |||
41 | #define trace_set(bit) /* | ||
42 | */ mov $r8 0x83c /* | ||
43 | */ shl b32 $r8 6 /* | ||
44 | */ clear b32 $r9 /* | ||
45 | */ bset $r9 bit /* | ||
46 | */ iowr I[$r8 + 0x000] $r9 | ||
47 | |||
48 | #define trace_clr(bit) /* | ||
49 | */ mov $r8 0x85c /* | ||
50 | */ shl b32 $r8 6 /* | ||
51 | */ clear b32 $r9 /* | ||
52 | */ bset $r9 bit /* | ||
53 | */ iowr I[$r8 + 0x000] $r9 | ||
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/nve0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/nve0.fuc deleted file mode 100644 index f16a5d53319d..000000000000 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/nve0.fuc +++ /dev/null | |||
@@ -1,400 +0,0 @@ | |||
1 | /* fuc microcode util functions for nve0 PGRAPH | ||
2 | * | ||
3 | * Copyright 2011 Red Hat Inc. | ||
4 | * | ||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
6 | * copy of this software and associated documentation files (the "Software"), | ||
7 | * to deal in the Software without restriction, including without limitation | ||
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
9 | * and/or sell copies of the Software, and to permit persons to whom the | ||
10 | * Software is furnished to do so, subject to the following conditions: | ||
11 | * | ||
12 | * The above copyright notice and this permission notice shall be included in | ||
13 | * all copies or substantial portions of the Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
21 | * OTHER DEALINGS IN THE SOFTWARE. | ||
22 | * | ||
23 | * Authors: Ben Skeggs | ||
24 | */ | ||
25 | |||
26 | define(`mmctx_data', `.b32 eval((($2 - 1) << 26) | $1)') | ||
27 | define(`queue_init', `.skip eval((2 * 4) + ((8 * 4) * 2))') | ||
28 | |||
29 | ifdef(`include_code', ` | ||
30 | // Error codes | ||
31 | define(`E_BAD_COMMAND', 0x01) | ||
32 | define(`E_CMD_OVERFLOW', 0x02) | ||
33 | |||
34 | // Util macros to help with debugging ucode hangs etc | ||
35 | define(`T_WAIT', 0) | ||
36 | define(`T_MMCTX', 1) | ||
37 | define(`T_STRWAIT', 2) | ||
38 | define(`T_STRINIT', 3) | ||
39 | define(`T_AUTO', 4) | ||
40 | define(`T_CHAN', 5) | ||
41 | define(`T_LOAD', 6) | ||
42 | define(`T_SAVE', 7) | ||
43 | define(`T_LCHAN', 8) | ||
44 | define(`T_LCTXH', 9) | ||
45 | |||
46 | define(`trace_set', ` | ||
47 | mov $r8 0x83c | ||
48 | shl b32 $r8 6 | ||
49 | clear b32 $r9 | ||
50 | bset $r9 $1 | ||
51 | iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7] | ||
52 | ') | ||
53 | |||
54 | define(`trace_clr', ` | ||
55 | mov $r8 0x85c | ||
56 | shl b32 $r8 6 | ||
57 | clear b32 $r9 | ||
58 | bset $r9 $1 | ||
59 | iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7] | ||
60 | ') | ||
61 | |||
62 | // queue_put - add request to queue | ||
63 | // | ||
64 | // In : $r13 queue pointer | ||
65 | // $r14 command | ||
66 | // $r15 data | ||
67 | // | ||
68 | queue_put: | ||
69 | // make sure we have space.. | ||
70 | ld b32 $r8 D[$r13 + 0x0] // GET | ||
71 | ld b32 $r9 D[$r13 + 0x4] // PUT | ||
72 | xor $r8 8 | ||
73 | cmpu b32 $r8 $r9 | ||
74 | bra ne #queue_put_next | ||
75 | mov $r15 E_CMD_OVERFLOW | ||
76 | call #error | ||
77 | ret | ||
78 | |||
79 | // store cmd/data on queue | ||
80 | queue_put_next: | ||
81 | and $r8 $r9 7 | ||
82 | shl b32 $r8 3 | ||
83 | add b32 $r8 $r13 | ||
84 | add b32 $r8 8 | ||
85 | st b32 D[$r8 + 0x0] $r14 | ||
86 | st b32 D[$r8 + 0x4] $r15 | ||
87 | |||
88 | // update PUT | ||
89 | add b32 $r9 1 | ||
90 | and $r9 0xf | ||
91 | st b32 D[$r13 + 0x4] $r9 | ||
92 | ret | ||
93 | |||
94 | // queue_get - fetch request from queue | ||
95 | // | ||
96 | // In : $r13 queue pointer | ||
97 | // | ||
98 | // Out: $p1 clear on success (data available) | ||
99 | // $r14 command | ||
100 | // $r15 data | ||
101 | // | ||
102 | queue_get: | ||
103 | bset $flags $p1 | ||
104 | ld b32 $r8 D[$r13 + 0x0] // GET | ||
105 | ld b32 $r9 D[$r13 + 0x4] // PUT | ||
106 | cmpu b32 $r8 $r9 | ||
107 | bra e #queue_get_done | ||
108 | // fetch first cmd/data pair | ||
109 | and $r9 $r8 7 | ||
110 | shl b32 $r9 3 | ||
111 | add b32 $r9 $r13 | ||
112 | add b32 $r9 8 | ||
113 | ld b32 $r14 D[$r9 + 0x0] | ||
114 | ld b32 $r15 D[$r9 + 0x4] | ||
115 | |||
116 | // update GET | ||
117 | add b32 $r8 1 | ||
118 | and $r8 0xf | ||
119 | st b32 D[$r13 + 0x0] $r8 | ||
120 | bclr $flags $p1 | ||
121 | queue_get_done: | ||
122 | ret | ||
123 | |||
124 | // nv_rd32 - read 32-bit value from nv register | ||
125 | // | ||
126 | // In : $r14 register | ||
127 | // Out: $r15 value | ||
128 | // | ||
129 | nv_rd32: | ||
130 | mov $r11 0x728 | ||
131 | shl b32 $r11 6 | ||
132 | mov b32 $r12 $r14 | ||
133 | bset $r12 31 // MMIO_CTRL_PENDING | ||
134 | iowr I[$r11 + 0x000] $r12 // MMIO_CTRL | ||
135 | nv_rd32_wait: | ||
136 | iord $r12 I[$r11 + 0x000] | ||
137 | xbit $r12 $r12 31 | ||
138 | bra ne #nv_rd32_wait | ||
139 | mov $r10 6 // DONE_MMIO_RD | ||
140 | call #wait_doneo | ||
141 | iord $r15 I[$r11 + 0x100] // MMIO_RDVAL | ||
142 | ret | ||
143 | |||
144 | // nv_wr32 - write 32-bit value to nv register | ||
145 | // | ||
146 | // In : $r14 register | ||
147 | // $r15 value | ||
148 | // | ||
149 | nv_wr32: | ||
150 | mov $r11 0x728 | ||
151 | shl b32 $r11 6 | ||
152 | iowr I[$r11 + 0x200] $r15 // MMIO_WRVAL | ||
153 | mov b32 $r12 $r14 | ||
154 | bset $r12 31 // MMIO_CTRL_PENDING | ||
155 | bset $r12 30 // MMIO_CTRL_WRITE | ||
156 | iowr I[$r11 + 0x000] $r12 // MMIO_CTRL | ||
157 | nv_wr32_wait: | ||
158 | iord $r12 I[$r11 + 0x000] | ||
159 | xbit $r12 $r12 31 | ||
160 | bra ne #nv_wr32_wait | ||
161 | ret | ||
162 | |||
163 | // (re)set watchdog timer | ||
164 | // | ||
165 | // In : $r15 timeout | ||
166 | // | ||
167 | watchdog_reset: | ||
168 | mov $r8 0x430 | ||
169 | shl b32 $r8 6 | ||
170 | bset $r15 31 | ||
171 | iowr I[$r8 + 0x000] $r15 | ||
172 | ret | ||
173 | |||
174 | // clear watchdog timer | ||
175 | watchdog_clear: | ||
176 | mov $r8 0x430 | ||
177 | shl b32 $r8 6 | ||
178 | iowr I[$r8 + 0x000] $r0 | ||
179 | ret | ||
180 | |||
181 | // wait_done{z,o} - wait on FUC_DONE bit to become clear/set | ||
182 | // | ||
183 | // In : $r10 bit to wait on | ||
184 | // | ||
185 | define(`wait_done', ` | ||
186 | $1: | ||
187 | trace_set(T_WAIT); | ||
188 | mov $r8 0x818 | ||
189 | shl b32 $r8 6 | ||
190 | iowr I[$r8 + 0x000] $r10 // CC_SCRATCH[6] = wait bit | ||
191 | wait_done_$1: | ||
192 | mov $r8 0x400 | ||
193 | shl b32 $r8 6 | ||
194 | iord $r8 I[$r8 + 0x000] // DONE | ||
195 | xbit $r8 $r8 $r10 | ||
196 | bra $2 #wait_done_$1 | ||
197 | trace_clr(T_WAIT) | ||
198 | ret | ||
199 | ') | ||
200 | wait_done(wait_donez, ne) | ||
201 | wait_done(wait_doneo, e) | ||
202 | |||
203 | // mmctx_size - determine size of a mmio list transfer | ||
204 | // | ||
205 | // In : $r14 mmio list head | ||
206 | // $r15 mmio list tail | ||
207 | // Out: $r15 transfer size (in bytes) | ||
208 | // | ||
209 | mmctx_size: | ||
210 | clear b32 $r9 | ||
211 | nv_mmctx_size_loop: | ||
212 | ld b32 $r8 D[$r14] | ||
213 | shr b32 $r8 26 | ||
214 | add b32 $r8 1 | ||
215 | shl b32 $r8 2 | ||
216 | add b32 $r9 $r8 | ||
217 | add b32 $r14 4 | ||
218 | cmpu b32 $r14 $r15 | ||
219 | bra ne #nv_mmctx_size_loop | ||
220 | mov b32 $r15 $r9 | ||
221 | ret | ||
222 | |||
223 | // mmctx_xfer - execute a list of mmio transfers | ||
224 | // | ||
225 | // In : $r10 flags | ||
226 | // bit 0: direction (0 = save, 1 = load) | ||
227 | // bit 1: set if first transfer | ||
228 | // bit 2: set if last transfer | ||
229 | // $r11 base | ||
230 | // $r12 mmio list head | ||
231 | // $r13 mmio list tail | ||
232 | // $r14 multi_stride | ||
233 | // $r15 multi_mask | ||
234 | // | ||
235 | mmctx_xfer: | ||
236 | trace_set(T_MMCTX) | ||
237 | mov $r8 0x710 | ||
238 | shl b32 $r8 6 | ||
239 | clear b32 $r9 | ||
240 | or $r11 $r11 | ||
241 | bra e #mmctx_base_disabled | ||
242 | iowr I[$r8 + 0x000] $r11 // MMCTX_BASE | ||
243 | bset $r9 0 // BASE_EN | ||
244 | mmctx_base_disabled: | ||
245 | or $r14 $r14 | ||
246 | bra e #mmctx_multi_disabled | ||
247 | iowr I[$r8 + 0x200] $r14 // MMCTX_MULTI_STRIDE | ||
248 | iowr I[$r8 + 0x300] $r15 // MMCTX_MULTI_MASK | ||
249 | bset $r9 1 // MULTI_EN | ||
250 | mmctx_multi_disabled: | ||
251 | add b32 $r8 0x100 | ||
252 | |||
253 | xbit $r11 $r10 0 | ||
254 | shl b32 $r11 16 // DIR | ||
255 | bset $r11 12 // QLIMIT = 0x10 | ||
256 | xbit $r14 $r10 1 | ||
257 | shl b32 $r14 17 | ||
258 | or $r11 $r14 // START_TRIGGER | ||
259 | iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL | ||
260 | |||
261 | // loop over the mmio list, and send requests to the hw | ||
262 | mmctx_exec_loop: | ||
263 | // wait for space in mmctx queue | ||
264 | mmctx_wait_free: | ||
265 | iord $r14 I[$r8 + 0x000] // MMCTX_CTRL | ||
266 | and $r14 0x1f | ||
267 | bra e #mmctx_wait_free | ||
268 | |||
269 | // queue up an entry | ||
270 | ld b32 $r14 D[$r12] | ||
271 | or $r14 $r9 | ||
272 | iowr I[$r8 + 0x300] $r14 | ||
273 | add b32 $r12 4 | ||
274 | cmpu b32 $r12 $r13 | ||
275 | bra ne #mmctx_exec_loop | ||
276 | |||
277 | xbit $r11 $r10 2 | ||
278 | bra ne #mmctx_stop | ||
279 | // wait for queue to empty | ||
280 | mmctx_fini_wait: | ||
281 | iord $r11 I[$r8 + 0x000] // MMCTX_CTRL | ||
282 | and $r11 0x1f | ||
283 | cmpu b32 $r11 0x10 | ||
284 | bra ne #mmctx_fini_wait | ||
285 | mov $r10 2 // DONE_MMCTX | ||
286 | call #wait_donez | ||
287 | bra #mmctx_done | ||
288 | mmctx_stop: | ||
289 | xbit $r11 $r10 0 | ||
290 | shl b32 $r11 16 // DIR | ||
291 | bset $r11 12 // QLIMIT = 0x10 | ||
292 | bset $r11 18 // STOP_TRIGGER | ||
293 | iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL | ||
294 | mmctx_stop_wait: | ||
295 | // wait for STOP_TRIGGER to clear | ||
296 | iord $r11 I[$r8 + 0x000] // MMCTX_CTRL | ||
297 | xbit $r11 $r11 18 | ||
298 | bra ne #mmctx_stop_wait | ||
299 | mmctx_done: | ||
300 | trace_clr(T_MMCTX) | ||
301 | ret | ||
302 | |||
303 | // Wait for DONE_STRAND | ||
304 | // | ||
305 | strand_wait: | ||
306 | push $r10 | ||
307 | mov $r10 2 | ||
308 | call #wait_donez | ||
309 | pop $r10 | ||
310 | ret | ||
311 | |||
312 | // unknown - call before issuing strand commands | ||
313 | // | ||
314 | strand_pre: | ||
315 | mov $r8 0x4afc | ||
316 | sethi $r8 0x20000 | ||
317 | mov $r9 0xc | ||
318 | iowr I[$r8] $r9 | ||
319 | call #strand_wait | ||
320 | ret | ||
321 | |||
322 | // unknown - call after issuing strand commands | ||
323 | // | ||
324 | strand_post: | ||
325 | mov $r8 0x4afc | ||
326 | sethi $r8 0x20000 | ||
327 | mov $r9 0xd | ||
328 | iowr I[$r8] $r9 | ||
329 | call #strand_wait | ||
330 | ret | ||
331 | |||
332 | // Selects strand set?! | ||
333 | // | ||
334 | // In: $r14 id | ||
335 | // | ||
336 | strand_set: | ||
337 | mov $r10 0x4ffc | ||
338 | sethi $r10 0x20000 | ||
339 | sub b32 $r11 $r10 0x500 | ||
340 | mov $r12 0xf | ||
341 | iowr I[$r10 + 0x000] $r12 // 0x93c = 0xf | ||
342 | mov $r12 0xb | ||
343 | iowr I[$r11 + 0x000] $r12 // 0x928 = 0xb | ||
344 | call #strand_wait | ||
345 | iowr I[$r10 + 0x000] $r14 // 0x93c = <id> | ||
346 | mov $r12 0xa | ||
347 | iowr I[$r11 + 0x000] $r12 // 0x928 = 0xa | ||
348 | call #strand_wait | ||
349 | ret | ||
350 | |||
351 | // Initialise strand context data | ||
352 | // | ||
353 | // In : $r15 context base | ||
354 | // Out: $r15 context size (in bytes) | ||
355 | // | ||
356 | // Strandset(?) 3 hardcoded currently | ||
357 | // | ||
358 | strand_ctx_init: | ||
359 | trace_set(T_STRINIT) | ||
360 | call #strand_pre | ||
361 | mov $r14 3 | ||
362 | call #strand_set | ||
363 | mov $r10 0x46fc | ||
364 | sethi $r10 0x20000 | ||
365 | add b32 $r11 $r10 0x400 | ||
366 | iowr I[$r10 + 0x100] $r0 // STRAND_FIRST_GENE = 0 | ||
367 | mov $r12 1 | ||
368 | iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_FIRST_GENE | ||
369 | call #strand_wait | ||
370 | sub b32 $r12 $r0 1 | ||
371 | iowr I[$r10 + 0x000] $r12 // STRAND_GENE_CNT = 0xffffffff | ||
372 | mov $r12 2 | ||
373 | iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_GENE_CNT | ||
374 | call #strand_wait | ||
375 | call #strand_post | ||
376 | |||
377 | // read the size of each strand, poke the context offset of | ||
378 | // each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry | ||
379 | // about it later then. | ||
380 | mov $r8 0x880 | ||
381 | shl b32 $r8 6 | ||
382 | iord $r9 I[$r8 + 0x000] // STRANDS | ||
383 | add b32 $r8 0x2200 | ||
384 | shr b32 $r14 $r15 8 | ||
385 | ctx_init_strand_loop: | ||
386 | iowr I[$r8 + 0x000] $r14 // STRAND_SAVE_SWBASE | ||
387 | iowr I[$r8 + 0x100] $r14 // STRAND_LOAD_SWBASE | ||
388 | iord $r10 I[$r8 + 0x200] // STRAND_SIZE | ||
389 | shr b32 $r10 6 | ||
390 | add b32 $r10 1 | ||
391 | add b32 $r14 $r10 | ||
392 | add b32 $r8 4 | ||
393 | sub b32 $r9 1 | ||
394 | bra ne #ctx_init_strand_loop | ||
395 | |||
396 | shl b32 $r14 8 | ||
397 | sub b32 $r15 $r14 $r15 | ||
398 | trace_clr(T_STRINIT) | ||
399 | ret | ||
400 | ') | ||
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/os.h b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/os.h new file mode 100644 index 000000000000..fd1d380de094 --- /dev/null +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/os.h | |||
@@ -0,0 +1,7 @@ | |||
1 | #ifndef __NVKM_GRAPH_OS_H__ | ||
2 | #define __NVKM_GRAPH_OS_H__ | ||
3 | |||
4 | #define E_BAD_COMMAND 0x00000001 | ||
5 | #define E_CMD_OVERFLOW 0x00000002 | ||
6 | |||
7 | #endif | ||