Commit | Line | Data |
---|---|---|
1978a2f2 BS |
1 | /* fuc microcode for nve0 PGRAPH/HUB |
2 | * | |
3 | * Copyright 2011 Red Hat Inc. | |
4 | * | |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | |
6 | * copy of this software and associated documentation files (the "Software"), | |
7 | * to deal in the Software without restriction, including without limitation | |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
9 | * and/or sell copies of the Software, and to permit persons to whom the | |
10 | * Software is furnished to do so, subject to the following conditions: | |
11 | * | |
12 | * The above copyright notice and this permission notice shall be included in | |
13 | * all copies or substantial portions of the Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
21 | * OTHER DEALINGS IN THE SOFTWARE. | |
22 | * | |
23 | * Authors: Ben Skeggs | |
24 | */ | |
25 | ||
26 | /* To build: | |
27 | * m4 nve0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nve0_grhub.fuc.h | |
28 | */ | |
29 | ||
30 | .section #nve0_grhub_data | |
31 | include(`nve0.fuc') | |
32 | gpc_count: .b32 0 | |
33 | rop_count: .b32 0 | |
34 | cmd_queue: queue_init | |
35 | hub_mmio_list_head: .b32 0 | |
36 | hub_mmio_list_tail: .b32 0 | |
37 | ||
38 | ctx_current: .b32 0 | |
39 | ||
cb1e06e0 BS |
40 | .align 256 |
41 | chan_data: | |
42 | chan_mmio_count: .b32 0 | |
43 | chan_mmio_address: .b32 0 | |
44 | ||
45 | .align 256 | |
46 | xfer_data: .b32 0 | |
47 | ||
48 | .align 256 | |
1978a2f2 BS |
49 | chipsets: |
50 | .b8 0xe4 0 0 0 | |
51 | .b16 #nve4_hub_mmio_head | |
52 | .b16 #nve4_hub_mmio_tail | |
53 | .b8 0xe7 0 0 0 | |
54 | .b16 #nve4_hub_mmio_head | |
55 | .b16 #nve4_hub_mmio_tail | |
eca15296 BS |
56 | .b8 0xe6 0 0 0 |
57 | .b16 #nve4_hub_mmio_head | |
58 | .b16 #nve4_hub_mmio_tail | |
cb1e06e0 BS |
59 | .b8 0xf0 0 0 0 |
60 | .b16 #nvf0_hub_mmio_head | |
61 | .b16 #nvf0_hub_mmio_tail | |
1978a2f2 BS |
62 | .b8 0 0 0 0 |
63 | ||
64 | nve4_hub_mmio_head: | |
65 | mmctx_data(0x17e91c, 2) | |
66 | mmctx_data(0x400204, 2) | |
67 | mmctx_data(0x404010, 7) | |
68 | mmctx_data(0x4040a8, 9) | |
69 | mmctx_data(0x4040d0, 7) | |
70 | mmctx_data(0x4040f8, 1) | |
71 | mmctx_data(0x404130, 3) | |
72 | mmctx_data(0x404150, 3) | |
73 | mmctx_data(0x404164, 1) | |
74 | mmctx_data(0x4041a0, 4) | |
75 | mmctx_data(0x404200, 4) | |
76 | mmctx_data(0x404404, 14) | |
77 | mmctx_data(0x404460, 4) | |
78 | mmctx_data(0x404480, 1) | |
79 | mmctx_data(0x404498, 1) | |
80 | mmctx_data(0x404604, 4) | |
81 | mmctx_data(0x404618, 4) | |
82 | mmctx_data(0x40462c, 2) | |
83 | mmctx_data(0x404640, 1) | |
84 | mmctx_data(0x404654, 1) | |
85 | mmctx_data(0x404660, 1) | |
86 | mmctx_data(0x404678, 19) | |
87 | mmctx_data(0x4046c8, 3) | |
88 | mmctx_data(0x404700, 3) | |
89 | mmctx_data(0x404718, 10) | |
90 | mmctx_data(0x404744, 2) | |
91 | mmctx_data(0x404754, 1) | |
92 | mmctx_data(0x405800, 1) | |
93 | mmctx_data(0x405830, 3) | |
94 | mmctx_data(0x405854, 1) | |
95 | mmctx_data(0x405870, 4) | |
96 | mmctx_data(0x405a00, 2) | |
97 | mmctx_data(0x405a18, 1) | |
98 | mmctx_data(0x405b00, 1) | |
99 | mmctx_data(0x405b10, 1) | |
100 | mmctx_data(0x406020, 1) | |
101 | mmctx_data(0x406028, 4) | |
102 | mmctx_data(0x4064a8, 2) | |
103 | mmctx_data(0x4064b4, 2) | |
104 | mmctx_data(0x4064c0, 12) | |
105 | mmctx_data(0x4064fc, 1) | |
106 | mmctx_data(0x407040, 1) | |
107 | mmctx_data(0x407804, 1) | |
108 | mmctx_data(0x40780c, 6) | |
109 | mmctx_data(0x4078bc, 1) | |
110 | mmctx_data(0x408000, 7) | |
111 | mmctx_data(0x408064, 1) | |
112 | mmctx_data(0x408800, 3) | |
113 | mmctx_data(0x408840, 1) | |
114 | mmctx_data(0x408900, 3) | |
115 | mmctx_data(0x408980, 1) | |
116 | nve4_hub_mmio_tail: | |
117 | ||
cb1e06e0 BS |
118 | nvf0_hub_mmio_head: |
119 | mmctx_data(0x17e91c, 2) | |
120 | mmctx_data(0x400204, 2) | |
121 | mmctx_data(0x404004, 17) | |
122 | mmctx_data(0x4040a8, 9) | |
123 | mmctx_data(0x4040d0, 7) | |
124 | mmctx_data(0x4040f8, 1) | |
125 | mmctx_data(0x404100, 10) | |
126 | mmctx_data(0x404130, 3) | |
127 | mmctx_data(0x404150, 3) | |
128 | mmctx_data(0x404164, 1) | |
129 | mmctx_data(0x40417c, 2) | |
130 | mmctx_data(0x4041a0, 4) | |
131 | mmctx_data(0x404200, 4) | |
132 | mmctx_data(0x404404, 12) | |
133 | mmctx_data(0x404438, 1) | |
134 | mmctx_data(0x404460, 4) | |
135 | mmctx_data(0x404480, 1) | |
136 | mmctx_data(0x404498, 1) | |
137 | mmctx_data(0x404604, 4) | |
138 | mmctx_data(0x404618, 4) | |
139 | mmctx_data(0x40462c, 2) | |
140 | mmctx_data(0x404640, 1) | |
141 | mmctx_data(0x404654, 1) | |
142 | mmctx_data(0x404660, 1) | |
143 | mmctx_data(0x404678, 19) | |
144 | mmctx_data(0x4046c8, 3) | |
145 | mmctx_data(0x404700, 3) | |
146 | mmctx_data(0x404718, 10) | |
147 | mmctx_data(0x404744, 2) | |
148 | mmctx_data(0x404754, 1) | |
149 | mmctx_data(0x405800, 1) | |
150 | mmctx_data(0x405830, 3) | |
151 | mmctx_data(0x405854, 1) | |
152 | mmctx_data(0x405870, 4) | |
153 | mmctx_data(0x405a00, 2) | |
154 | mmctx_data(0x405a18, 1) | |
155 | mmctx_data(0x405b00, 1) | |
156 | mmctx_data(0x405b10, 1) | |
157 | mmctx_data(0x405b20, 1) | |
158 | mmctx_data(0x406020, 1) | |
159 | mmctx_data(0x406028, 4) | |
160 | mmctx_data(0x4064a8, 5) | |
161 | mmctx_data(0x4064c0, 12) | |
162 | mmctx_data(0x4064fc, 1) | |
163 | mmctx_data(0x407804, 1) | |
164 | mmctx_data(0x40780c, 6) | |
165 | mmctx_data(0x4078bc, 1) | |
166 | mmctx_data(0x408000, 7) | |
167 | mmctx_data(0x408064, 1) | |
168 | mmctx_data(0x408800, 3) | |
169 | mmctx_data(0x408840, 1) | |
170 | mmctx_data(0x408900, 3) | |
171 | mmctx_data(0x408980, 1) | |
172 | nvf0_hub_mmio_tail: | |
1978a2f2 BS |
173 | |
174 | .section #nve0_grhub_code | |
175 | bra #init | |
176 | define(`include_code') | |
177 | include(`nve0.fuc') | |
178 | ||
179 | // reports an exception to the host | |
180 | // | |
181 | // In: $r15 error code (see nve0.fuc) | |
182 | // | |
183 | error: | |
184 | push $r14 | |
185 | mov $r14 0x814 | |
186 | shl b32 $r14 6 | |
187 | iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code | |
188 | mov $r14 0xc1c | |
189 | shl b32 $r14 6 | |
190 | mov $r15 1 | |
191 | iowr I[$r14 + 0x000] $r15 // INTR_UP_SET | |
192 | pop $r14 | |
193 | ret | |
194 | ||
195 | // HUB fuc initialisation, executed by triggering ucode start, will | |
196 | // fall through to main loop after completion. | |
197 | // | |
198 | // Input: | |
199 | // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) | |
200 | // | |
201 | // Output: | |
202 | // CC_SCRATCH[0]: | |
203 | // 31:31: set to signal completion | |
204 | // CC_SCRATCH[1]: | |
205 | // 31:0: total PGRAPH context size | |
206 | // | |
207 | init: | |
208 | clear b32 $r0 | |
209 | mov $sp $r0 | |
210 | mov $xdbase $r0 | |
211 | ||
212 | // enable fifo access | |
213 | mov $r1 0x1200 | |
214 | mov $r2 2 | |
215 | iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE | |
216 | ||
217 | // setup i0 handler, and route all interrupts to it | |
218 | mov $r1 #ih | |
219 | mov $iv0 $r1 | |
220 | mov $r1 0x400 | |
221 | iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH | |
222 | ||
223 | // route HUB_CHANNEL_SWITCH to fuc interrupt 8 | |
224 | mov $r3 0x404 | |
225 | shl b32 $r3 6 | |
226 | mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8 | |
227 | iowr I[$r3 + 0x000] $r2 | |
228 | ||
229 | // not sure what these are, route them because NVIDIA does, and | |
230 | // the IRQ handler will signal the host if we ever get one.. we | |
231 | // may find out if/why we need to handle these if so.. | |
232 | // | |
233 | mov $r2 0x2004 | |
234 | iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9 | |
235 | mov $r2 0x200b | |
236 | iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10 | |
237 | mov $r2 0x200c | |
238 | iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15 | |
239 | ||
240 | // enable all INTR_UP interrupts | |
241 | mov $r2 0xc24 | |
242 | shl b32 $r2 6 | |
243 | not b32 $r3 $r0 | |
244 | iowr I[$r2] $r3 | |
245 | ||
246 | // enable fifo, ctxsw, 9, 10, 15 interrupts | |
247 | mov $r2 -0x78fc // 0x8704 | |
248 | sethi $r2 0 | |
249 | iowr I[$r1 + 0x000] $r2 // INTR_EN_SET | |
250 | ||
251 | // fifo level triggered, rest edge | |
252 | sub b32 $r1 0x100 | |
253 | mov $r2 4 | |
254 | iowr I[$r1] $r2 | |
255 | ||
256 | // enable interrupts | |
257 | bset $flags ie0 | |
258 | ||
259 | // fetch enabled GPC/ROP counts | |
260 | mov $r14 -0x69fc // 0x409604 | |
261 | sethi $r14 0x400000 | |
262 | call #nv_rd32 | |
263 | extr $r1 $r15 16:20 | |
264 | st b32 D[$r0 + #rop_count] $r1 | |
265 | and $r15 0x1f | |
266 | st b32 D[$r0 + #gpc_count] $r15 | |
267 | ||
268 | // set BAR_REQMASK to GPC mask | |
269 | mov $r1 1 | |
270 | shl b32 $r1 $r15 | |
271 | sub b32 $r1 1 | |
272 | mov $r2 0x40c | |
273 | shl b32 $r2 6 | |
274 | iowr I[$r2 + 0x000] $r1 | |
275 | iowr I[$r2 + 0x100] $r1 | |
276 | ||
277 | // find context data for this chipset | |
278 | mov $r2 0x800 | |
279 | shl b32 $r2 6 | |
280 | iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] | |
281 | mov $r15 #chipsets - 8 | |
282 | init_find_chipset: | |
283 | add b32 $r15 8 | |
284 | ld b32 $r3 D[$r15 + 0x00] | |
285 | cmpu b32 $r3 $r2 | |
286 | bra e #init_context | |
287 | cmpu b32 $r3 0 | |
288 | bra ne #init_find_chipset | |
289 | // unknown chipset | |
290 | ret | |
291 | ||
292 | // context size calculation, reserve first 256 bytes for use by fuc | |
293 | init_context: | |
294 | mov $r1 256 | |
295 | ||
296 | // calculate size of mmio context data | |
297 | ld b16 $r14 D[$r15 + 4] | |
298 | ld b16 $r15 D[$r15 + 6] | |
299 | sethi $r14 0 | |
300 | st b32 D[$r0 + #hub_mmio_list_head] $r14 | |
301 | st b32 D[$r0 + #hub_mmio_list_tail] $r15 | |
302 | call #mmctx_size | |
303 | ||
304 | // set mmctx base addresses now so we don't have to do it later, | |
305 | // they don't (currently) ever change | |
306 | mov $r3 0x700 | |
307 | shl b32 $r3 6 | |
308 | shr b32 $r4 $r1 8 | |
309 | iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE | |
310 | iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE | |
311 | add b32 $r3 0x1300 | |
312 | add b32 $r1 $r15 | |
313 | shr b32 $r15 2 | |
314 | iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!? | |
315 | ||
316 | // strands, base offset needs to be aligned to 256 bytes | |
317 | shr b32 $r1 8 | |
318 | add b32 $r1 1 | |
319 | shl b32 $r1 8 | |
320 | mov b32 $r15 $r1 | |
321 | call #strand_ctx_init | |
322 | add b32 $r1 $r15 | |
323 | ||
324 | // initialise each GPC in sequence by passing in the offset of its | |
325 | // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which | |
326 | // has previously been uploaded by the host) running. | |
327 | // | |
328 | // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31 | |
329 | // when it has completed, and return the size of its context data | |
330 | // in GPCn_CC_SCRATCH[1] | |
331 | // | |
332 | ld b32 $r3 D[$r0 + #gpc_count] | |
333 | mov $r4 0x2000 | |
334 | sethi $r4 0x500000 | |
335 | init_gpc: | |
336 | // setup, and start GPC ucode running | |
337 | add b32 $r14 $r4 0x804 | |
338 | mov b32 $r15 $r1 | |
339 | call #nv_wr32 // CC_SCRATCH[1] = ctx offset | |
340 | add b32 $r14 $r4 0x800 | |
341 | mov b32 $r15 $r2 | |
342 | call #nv_wr32 // CC_SCRATCH[0] = chipset | |
343 | add b32 $r14 $r4 0x10c | |
344 | clear b32 $r15 | |
345 | call #nv_wr32 | |
346 | add b32 $r14 $r4 0x104 | |
347 | call #nv_wr32 // ENTRY | |
348 | add b32 $r14 $r4 0x100 | |
349 | mov $r15 2 // CTRL_START_TRIGGER | |
350 | call #nv_wr32 // CTRL | |
351 | ||
352 | // wait for it to complete, and adjust context size | |
353 | add b32 $r14 $r4 0x800 | |
354 | init_gpc_wait: | |
355 | call #nv_rd32 | |
356 | xbit $r15 $r15 31 | |
357 | bra e #init_gpc_wait | |
358 | add b32 $r14 $r4 0x804 | |
359 | call #nv_rd32 | |
360 | add b32 $r1 $r15 | |
361 | ||
362 | // next! | |
363 | add b32 $r4 0x8000 | |
364 | sub b32 $r3 1 | |
365 | bra ne #init_gpc | |
366 | ||
367 | // save context size, and tell host we're ready | |
368 | mov $r2 0x800 | |
369 | shl b32 $r2 6 | |
370 | iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size | |
371 | add b32 $r2 0x800 | |
372 | clear b32 $r1 | |
373 | bset $r1 31 | |
374 | iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000 | |
375 | ||
376 | // Main program loop, very simple, sleeps until woken up by the interrupt | |
377 | // handler, pulls a command from the queue and executes its handler | |
378 | // | |
379 | main: | |
380 | // sleep until we have something to do | |
381 | bset $flags $p0 | |
382 | sleep $p0 | |
383 | mov $r13 #cmd_queue | |
384 | call #queue_get | |
385 | bra $p1 #main | |
386 | ||
387 | // context switch, requested by GPU? | |
388 | cmpu b32 $r14 0x4001 | |
389 | bra ne #main_not_ctx_switch | |
390 | trace_set(T_AUTO) | |
391 | mov $r1 0xb00 | |
392 | shl b32 $r1 6 | |
393 | iord $r2 I[$r1 + 0x100] // CHAN_NEXT | |
394 | iord $r1 I[$r1 + 0x000] // CHAN_CUR | |
395 | ||
396 | xbit $r3 $r1 31 | |
397 | bra e #chsw_no_prev | |
398 | xbit $r3 $r2 31 | |
399 | bra e #chsw_prev_no_next | |
400 | push $r2 | |
401 | mov b32 $r2 $r1 | |
402 | trace_set(T_SAVE) | |
403 | bclr $flags $p1 | |
404 | bset $flags $p2 | |
405 | call #ctx_xfer | |
406 | trace_clr(T_SAVE); | |
407 | pop $r2 | |
408 | trace_set(T_LOAD); | |
409 | bset $flags $p1 | |
410 | call #ctx_xfer | |
411 | trace_clr(T_LOAD); | |
412 | bra #chsw_done | |
413 | chsw_prev_no_next: | |
414 | push $r2 | |
415 | mov b32 $r2 $r1 | |
416 | bclr $flags $p1 | |
417 | bclr $flags $p2 | |
418 | call #ctx_xfer | |
419 | pop $r2 | |
420 | mov $r1 0xb00 | |
421 | shl b32 $r1 6 | |
422 | iowr I[$r1] $r2 | |
423 | bra #chsw_done | |
424 | chsw_no_prev: | |
425 | xbit $r3 $r2 31 | |
426 | bra e #chsw_done | |
427 | bset $flags $p1 | |
428 | bclr $flags $p2 | |
429 | call #ctx_xfer | |
430 | ||
431 | // ack the context switch request | |
432 | chsw_done: | |
433 | mov $r1 0xb0c | |
434 | shl b32 $r1 6 | |
435 | mov $r2 1 | |
436 | iowr I[$r1 + 0x000] $r2 // 0x409b0c | |
437 | trace_clr(T_AUTO) | |
438 | bra #main | |
439 | ||
440 | // request to set current channel? (*not* a context switch) | |
441 | main_not_ctx_switch: | |
442 | cmpu b32 $r14 0x0001 | |
443 | bra ne #main_not_ctx_chan | |
444 | mov b32 $r2 $r15 | |
445 | call #ctx_chan | |
446 | bra #main_done | |
447 | ||
448 | // request to store current channel context? | |
449 | main_not_ctx_chan: | |
450 | cmpu b32 $r14 0x0002 | |
451 | bra ne #main_not_ctx_save | |
452 | trace_set(T_SAVE) | |
453 | bclr $flags $p1 | |
454 | bclr $flags $p2 | |
455 | call #ctx_xfer | |
456 | trace_clr(T_SAVE) | |
457 | bra #main_done | |
458 | ||
459 | main_not_ctx_save: | |
460 | shl b32 $r15 $r14 16 | |
461 | or $r15 E_BAD_COMMAND | |
462 | call #error | |
463 | bra #main | |
464 | ||
465 | main_done: | |
466 | mov $r1 0x820 | |
467 | shl b32 $r1 6 | |
468 | clear b32 $r2 | |
469 | bset $r2 31 | |
470 | iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 | |
471 | bra #main | |
472 | ||
473 | // interrupt handler | |
474 | ih: | |
475 | push $r8 | |
476 | mov $r8 $flags | |
477 | push $r8 | |
478 | push $r9 | |
479 | push $r10 | |
480 | push $r11 | |
481 | push $r13 | |
482 | push $r14 | |
483 | push $r15 | |
484 | ||
485 | // incoming fifo command? | |
486 | iord $r10 I[$r0 + 0x200] // INTR | |
487 | and $r11 $r10 0x00000004 | |
488 | bra e #ih_no_fifo | |
489 | // queue incoming fifo command for later processing | |
490 | mov $r11 0x1900 | |
491 | mov $r13 #cmd_queue | |
492 | iord $r14 I[$r11 + 0x100] // FIFO_CMD | |
493 | iord $r15 I[$r11 + 0x000] // FIFO_DATA | |
494 | call #queue_put | |
495 | add b32 $r11 0x400 | |
496 | mov $r14 1 | |
497 | iowr I[$r11 + 0x000] $r14 // FIFO_ACK | |
498 | ||
499 | // context switch request? | |
500 | ih_no_fifo: | |
501 | and $r11 $r10 0x00000100 | |
502 | bra e #ih_no_ctxsw | |
503 | // enqueue a context switch for later processing | |
504 | mov $r13 #cmd_queue | |
505 | mov $r14 0x4001 | |
506 | call #queue_put | |
507 | ||
508 | // anything we didn't handle, bring it to the host's attention | |
509 | ih_no_ctxsw: | |
510 | mov $r11 0x104 | |
511 | not b32 $r11 | |
512 | and $r11 $r10 $r11 | |
513 | bra e #ih_no_other | |
514 | mov $r10 0xc1c | |
515 | shl b32 $r10 6 | |
516 | iowr I[$r10] $r11 // INTR_UP_SET | |
517 | ||
518 | // ack, and wake up main() | |
519 | ih_no_other: | |
520 | iowr I[$r0 + 0x100] $r10 // INTR_ACK | |
521 | ||
522 | pop $r15 | |
523 | pop $r14 | |
524 | pop $r13 | |
525 | pop $r11 | |
526 | pop $r10 | |
527 | pop $r9 | |
528 | pop $r8 | |
529 | mov $flags $r8 | |
530 | pop $r8 | |
531 | bclr $flags $p0 | |
532 | iret | |
533 | ||
1978a2f2 BS |
534 | // Again, not real sure |
535 | // | |
536 | // In: $r15 value to set 0x404170 to | |
537 | // | |
538 | ctx_4170s: | |
539 | mov $r14 0x4170 | |
540 | sethi $r14 0x400000 | |
541 | or $r15 0x10 | |
542 | call #nv_wr32 | |
543 | ret | |
544 | ||
545 | // Waits for a ctx_4170s() call to complete | |
546 | // | |
547 | ctx_4170w: | |
548 | mov $r14 0x4170 | |
549 | sethi $r14 0x400000 | |
550 | call #nv_rd32 | |
551 | and $r15 0x10 | |
552 | bra ne #ctx_4170w | |
553 | ret | |
554 | ||
555 | // Disables various things, waits a bit, and re-enables them.. | |
556 | // | |
557 | // Not sure how exactly this helps, perhaps "ENABLE" is not such a | |
558 | // good description for the bits we turn off? Anyways, without this, | |
559 | // funny things happen. | |
560 | // | |
561 | ctx_redswitch: | |
562 | mov $r14 0x614 | |
563 | shl b32 $r14 6 | |
564 | mov $r15 0x270 | |
565 | iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL | |
566 | mov $r15 8 | |
567 | ctx_redswitch_delay: | |
568 | sub b32 $r15 1 | |
569 | bra ne #ctx_redswitch_delay | |
570 | mov $r15 0x770 | |
571 | iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL | |
572 | ret | |
573 | ||
574 | // Not a clue what this is for, except that unless the value is 0x10, the | |
575 | // strand context is saved (and presumably restored) incorrectly.. | |
576 | // | |
577 | // In: $r15 value to set to (0x00/0x10 are used) | |
578 | // | |
579 | ctx_86c: | |
580 | mov $r14 0x86c | |
581 | shl b32 $r14 6 | |
582 | iowr I[$r14] $r15 // HUB(0x86c) = val | |
583 | mov $r14 -0x75ec | |
584 | sethi $r14 0x400000 | |
585 | call #nv_wr32 // ROP(0xa14) = val | |
586 | mov $r14 -0x5794 | |
587 | sethi $r14 0x410000 | |
588 | call #nv_wr32 // GPC(0x86c) = val | |
589 | ret | |
590 | ||
591 | // ctx_load - load's a channel's ctxctl data, and selects its vm | |
592 | // | |
593 | // In: $r2 channel address | |
594 | // | |
595 | ctx_load: | |
596 | trace_set(T_CHAN) | |
597 | ||
598 | // switch to channel, somewhat magic in parts.. | |
599 | mov $r10 12 // DONE_UNK12 | |
600 | call #wait_donez | |
601 | mov $r1 0xa24 | |
602 | shl b32 $r1 6 | |
603 | iowr I[$r1 + 0x000] $r0 // 0x409a24 | |
604 | mov $r3 0xb00 | |
605 | shl b32 $r3 6 | |
606 | iowr I[$r3 + 0x100] $r2 // CHAN_NEXT | |
607 | mov $r1 0xa0c | |
608 | shl b32 $r1 6 | |
609 | mov $r4 7 | |
610 | iowr I[$r1 + 0x000] $r2 // MEM_CHAN | |
611 | iowr I[$r1 + 0x100] $r4 // MEM_CMD | |
612 | ctx_chan_wait_0: | |
613 | iord $r4 I[$r1 + 0x100] | |
614 | and $r4 0x1f | |
615 | bra ne #ctx_chan_wait_0 | |
616 | iowr I[$r3 + 0x000] $r2 // CHAN_CUR | |
617 | ||
618 | // load channel header, fetch PGRAPH context pointer | |
619 | mov $xtargets $r0 | |
620 | bclr $r2 31 | |
621 | shl b32 $r2 4 | |
622 | add b32 $r2 2 | |
623 | ||
624 | trace_set(T_LCHAN) | |
625 | mov $r1 0xa04 | |
626 | shl b32 $r1 6 | |
627 | iowr I[$r1 + 0x000] $r2 // MEM_BASE | |
628 | mov $r1 0xa20 | |
629 | shl b32 $r1 6 | |
630 | mov $r2 0x0002 | |
631 | sethi $r2 0x80000000 | |
632 | iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram | |
633 | mov $r1 0x10 // chan + 0x0210 | |
634 | mov $r2 #xfer_data | |
635 | sethi $r2 0x00020000 // 16 bytes | |
636 | xdld $r1 $r2 | |
637 | xdwait | |
638 | trace_clr(T_LCHAN) | |
639 | ||
640 | // update current context | |
641 | ld b32 $r1 D[$r0 + #xfer_data + 4] | |
642 | shl b32 $r1 24 | |
643 | ld b32 $r2 D[$r0 + #xfer_data + 0] | |
644 | shr b32 $r2 8 | |
645 | or $r1 $r2 | |
646 | st b32 D[$r0 + #ctx_current] $r1 | |
647 | ||
648 | // set transfer base to start of context, and fetch context header | |
649 | trace_set(T_LCTXH) | |
650 | mov $r2 0xa04 | |
651 | shl b32 $r2 6 | |
652 | iowr I[$r2 + 0x000] $r1 // MEM_BASE | |
653 | mov $r2 1 | |
654 | mov $r1 0xa20 | |
655 | shl b32 $r1 6 | |
656 | iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm | |
657 | mov $r1 #chan_data | |
658 | sethi $r1 0x00060000 // 256 bytes | |
659 | xdld $r0 $r1 | |
660 | xdwait | |
661 | trace_clr(T_LCTXH) | |
662 | ||
663 | trace_clr(T_CHAN) | |
664 | ret | |
665 | ||
666 | // ctx_chan - handler for HUB_SET_CHAN command, will set a channel as | |
667 | // the active channel for ctxctl, but not actually transfer | |
668 | // any context data. intended for use only during initial | |
669 | // context construction. | |
670 | // | |
671 | // In: $r2 channel address | |
672 | // | |
673 | ctx_chan: | |
1978a2f2 BS |
674 | call #ctx_load |
675 | mov $r10 12 // DONE_UNK12 | |
676 | call #wait_donez | |
677 | mov $r1 0xa10 | |
678 | shl b32 $r1 6 | |
679 | mov $r2 5 | |
680 | iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???) | |
681 | ctx_chan_wait: | |
682 | iord $r2 I[$r1 + 0x000] | |
683 | or $r2 $r2 | |
684 | bra ne #ctx_chan_wait | |
1978a2f2 BS |
685 | ret |
686 | ||
687 | // Execute per-context state overrides list | |
688 | // | |
689 | // Only executed on the first load of a channel. Might want to look into | |
690 | // removing this and having the host directly modify the channel's context | |
691 | // to change this state... The nouveau DRM already builds this list as | |
692 | // it's definitely needed for NVIDIA's, so we may as well use it for now | |
693 | // | |
694 | // Input: $r1 mmio list length | |
695 | // | |
696 | ctx_mmio_exec: | |
697 | // set transfer base to be the mmio list | |
698 | ld b32 $r3 D[$r0 + #chan_mmio_address] | |
699 | mov $r2 0xa04 | |
700 | shl b32 $r2 6 | |
701 | iowr I[$r2 + 0x000] $r3 // MEM_BASE | |
702 | ||
703 | clear b32 $r3 | |
704 | ctx_mmio_loop: | |
705 | // fetch next 256 bytes of mmio list if necessary | |
706 | and $r4 $r3 0xff | |
707 | bra ne #ctx_mmio_pull | |
708 | mov $r5 #xfer_data | |
709 | sethi $r5 0x00060000 // 256 bytes | |
710 | xdld $r3 $r5 | |
711 | xdwait | |
712 | ||
713 | // execute a single list entry | |
714 | ctx_mmio_pull: | |
715 | ld b32 $r14 D[$r4 + #xfer_data + 0x00] | |
716 | ld b32 $r15 D[$r4 + #xfer_data + 0x04] | |
717 | call #nv_wr32 | |
718 | ||
719 | // next! | |
720 | add b32 $r3 8 | |
721 | sub b32 $r1 1 | |
722 | bra ne #ctx_mmio_loop | |
723 | ||
724 | // set transfer base back to the current context | |
725 | ctx_mmio_done: | |
726 | ld b32 $r3 D[$r0 + #ctx_current] | |
727 | iowr I[$r2 + 0x000] $r3 // MEM_BASE | |
728 | ||
729 | // disable the mmio list now, we don't need/want to execute it again | |
730 | st b32 D[$r0 + #chan_mmio_count] $r0 | |
731 | mov $r1 #chan_data | |
732 | sethi $r1 0x00060000 // 256 bytes | |
733 | xdst $r0 $r1 | |
734 | xdwait | |
735 | ret | |
736 | ||
737 | // Transfer HUB context data between GPU and storage area | |
738 | // | |
739 | // In: $r2 channel address | |
740 | // $p1 clear on save, set on load | |
741 | // $p2 set if opposite direction done/will be done, so: | |
742 | // on save it means: "a load will follow this save" | |
743 | // on load it means: "a save preceeded this load" | |
744 | // | |
745 | ctx_xfer: | |
eca15296 BS |
746 | // according to mwk, some kind of wait for idle |
747 | mov $r15 0xc00 | |
748 | shl b32 $r15 6 | |
749 | mov $r14 4 | |
750 | iowr I[$r15 + 0x200] $r14 | |
751 | ctx_xfer_idle: | |
752 | iord $r14 I[$r15 + 0x000] | |
753 | and $r14 0x2000 | |
754 | bra ne #ctx_xfer_idle | |
755 | ||
1978a2f2 BS |
756 | bra not $p1 #ctx_xfer_pre |
757 | bra $p2 #ctx_xfer_pre_load | |
758 | ctx_xfer_pre: | |
759 | mov $r15 0x10 | |
760 | call #ctx_86c | |
1978a2f2 BS |
761 | bra not $p1 #ctx_xfer_exec |
762 | ||
763 | ctx_xfer_pre_load: | |
764 | mov $r15 2 | |
765 | call #ctx_4170s | |
766 | call #ctx_4170w | |
767 | call #ctx_redswitch | |
768 | clear b32 $r15 | |
769 | call #ctx_4170s | |
770 | call #ctx_load | |
771 | ||
772 | // fetch context pointer, and initiate xfer on all GPCs | |
773 | ctx_xfer_exec: | |
774 | ld b32 $r1 D[$r0 + #ctx_current] | |
775 | mov $r2 0x414 | |
776 | shl b32 $r2 6 | |
777 | iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset | |
778 | mov $r14 -0x5b00 | |
779 | sethi $r14 0x410000 | |
780 | mov b32 $r15 $r1 | |
781 | call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer | |
782 | add b32 $r14 4 | |
783 | xbit $r15 $flags $p1 | |
784 | xbit $r2 $flags $p2 | |
785 | shl b32 $r2 1 | |
786 | or $r15 $r2 | |
787 | call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type) | |
788 | ||
789 | // strands | |
790 | mov $r1 0x4afc | |
791 | sethi $r1 0x20000 | |
792 | mov $r2 0xc | |
793 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c | |
794 | call #strand_wait | |
795 | mov $r2 0x47fc | |
796 | sethi $r2 0x20000 | |
797 | iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 | |
798 | xbit $r2 $flags $p1 | |
799 | add b32 $r2 3 | |
800 | iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) | |
801 | ||
802 | // mmio context | |
803 | xbit $r10 $flags $p1 // direction | |
804 | or $r10 6 // first, last | |
805 | mov $r11 0 // base = 0 | |
806 | ld b32 $r12 D[$r0 + #hub_mmio_list_head] | |
807 | ld b32 $r13 D[$r0 + #hub_mmio_list_tail] | |
808 | mov $r14 0 // not multi | |
809 | call #mmctx_xfer | |
810 | ||
811 | // wait for GPCs to all complete | |
812 | mov $r10 8 // DONE_BAR | |
813 | call #wait_doneo | |
814 | ||
815 | // wait for strand xfer to complete | |
816 | call #strand_wait | |
817 | ||
818 | // post-op | |
819 | bra $p1 #ctx_xfer_post | |
820 | mov $r10 12 // DONE_UNK12 | |
821 | call #wait_donez | |
822 | mov $r1 0xa10 | |
823 | shl b32 $r1 6 | |
824 | mov $r2 5 | |
825 | iowr I[$r1] $r2 // MEM_CMD | |
826 | ctx_xfer_post_save_wait: | |
827 | iord $r2 I[$r1] | |
828 | or $r2 $r2 | |
829 | bra ne #ctx_xfer_post_save_wait | |
830 | ||
831 | bra $p2 #ctx_xfer_done | |
832 | ctx_xfer_post: | |
833 | mov $r15 2 | |
834 | call #ctx_4170s | |
835 | clear b32 $r15 | |
836 | call #ctx_86c | |
837 | call #strand_post | |
838 | call #ctx_4170w | |
839 | clear b32 $r15 | |
840 | call #ctx_4170s | |
841 | ||
842 | bra not $p1 #ctx_xfer_no_post_mmio | |
843 | ld b32 $r1 D[$r0 + #chan_mmio_count] | |
844 | or $r1 $r1 | |
845 | bra e #ctx_xfer_no_post_mmio | |
846 | call #ctx_mmio_exec | |
847 | ||
848 | ctx_xfer_no_post_mmio: | |
1978a2f2 BS |
849 | |
850 | ctx_xfer_done: | |
851 | ret | |
852 | ||
853 | .align 256 |