drm/nvf0/gr: initial register/context setup
[deliverable/linux.git] / drivers / gpu / drm / nouveau / core / engine / graph / fuc / hubnve0.fuc
1 /* fuc microcode for nve0 PGRAPH/HUB
2 *
3 * Copyright 2011 Red Hat Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: Ben Skeggs
24 */
25
26 /* To build:
27 * m4 nve0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nve0_grhub.fuc.h
28 */
29
30 .section #nve0_grhub_data
31 include(`nve0.fuc')
32 gpc_count: .b32 0
33 rop_count: .b32 0
34 cmd_queue: queue_init
35 hub_mmio_list_head: .b32 0
36 hub_mmio_list_tail: .b32 0
37
38 ctx_current: .b32 0
39
40 .align 256
41 chan_data:
42 chan_mmio_count: .b32 0
43 chan_mmio_address: .b32 0
44
45 .align 256
46 xfer_data: .b32 0
47
48 .align 256
49 chipsets:
50 .b8 0xe4 0 0 0
51 .b16 #nve4_hub_mmio_head
52 .b16 #nve4_hub_mmio_tail
53 .b8 0xe7 0 0 0
54 .b16 #nve4_hub_mmio_head
55 .b16 #nve4_hub_mmio_tail
56 .b8 0xe6 0 0 0
57 .b16 #nve4_hub_mmio_head
58 .b16 #nve4_hub_mmio_tail
59 .b8 0xf0 0 0 0
60 .b16 #nvf0_hub_mmio_head
61 .b16 #nvf0_hub_mmio_tail
62 .b8 0 0 0 0
63
64 nve4_hub_mmio_head:
65 mmctx_data(0x17e91c, 2)
66 mmctx_data(0x400204, 2)
67 mmctx_data(0x404010, 7)
68 mmctx_data(0x4040a8, 9)
69 mmctx_data(0x4040d0, 7)
70 mmctx_data(0x4040f8, 1)
71 mmctx_data(0x404130, 3)
72 mmctx_data(0x404150, 3)
73 mmctx_data(0x404164, 1)
74 mmctx_data(0x4041a0, 4)
75 mmctx_data(0x404200, 4)
76 mmctx_data(0x404404, 14)
77 mmctx_data(0x404460, 4)
78 mmctx_data(0x404480, 1)
79 mmctx_data(0x404498, 1)
80 mmctx_data(0x404604, 4)
81 mmctx_data(0x404618, 4)
82 mmctx_data(0x40462c, 2)
83 mmctx_data(0x404640, 1)
84 mmctx_data(0x404654, 1)
85 mmctx_data(0x404660, 1)
86 mmctx_data(0x404678, 19)
87 mmctx_data(0x4046c8, 3)
88 mmctx_data(0x404700, 3)
89 mmctx_data(0x404718, 10)
90 mmctx_data(0x404744, 2)
91 mmctx_data(0x404754, 1)
92 mmctx_data(0x405800, 1)
93 mmctx_data(0x405830, 3)
94 mmctx_data(0x405854, 1)
95 mmctx_data(0x405870, 4)
96 mmctx_data(0x405a00, 2)
97 mmctx_data(0x405a18, 1)
98 mmctx_data(0x405b00, 1)
99 mmctx_data(0x405b10, 1)
100 mmctx_data(0x406020, 1)
101 mmctx_data(0x406028, 4)
102 mmctx_data(0x4064a8, 2)
103 mmctx_data(0x4064b4, 2)
104 mmctx_data(0x4064c0, 12)
105 mmctx_data(0x4064fc, 1)
106 mmctx_data(0x407040, 1)
107 mmctx_data(0x407804, 1)
108 mmctx_data(0x40780c, 6)
109 mmctx_data(0x4078bc, 1)
110 mmctx_data(0x408000, 7)
111 mmctx_data(0x408064, 1)
112 mmctx_data(0x408800, 3)
113 mmctx_data(0x408840, 1)
114 mmctx_data(0x408900, 3)
115 mmctx_data(0x408980, 1)
116 nve4_hub_mmio_tail:
117
118 nvf0_hub_mmio_head:
119 mmctx_data(0x17e91c, 2)
120 mmctx_data(0x400204, 2)
121 mmctx_data(0x404004, 17)
122 mmctx_data(0x4040a8, 9)
123 mmctx_data(0x4040d0, 7)
124 mmctx_data(0x4040f8, 1)
125 mmctx_data(0x404100, 10)
126 mmctx_data(0x404130, 3)
127 mmctx_data(0x404150, 3)
128 mmctx_data(0x404164, 1)
129 mmctx_data(0x40417c, 2)
130 mmctx_data(0x4041a0, 4)
131 mmctx_data(0x404200, 4)
132 mmctx_data(0x404404, 12)
133 mmctx_data(0x404438, 1)
134 mmctx_data(0x404460, 4)
135 mmctx_data(0x404480, 1)
136 mmctx_data(0x404498, 1)
137 mmctx_data(0x404604, 4)
138 mmctx_data(0x404618, 4)
139 mmctx_data(0x40462c, 2)
140 mmctx_data(0x404640, 1)
141 mmctx_data(0x404654, 1)
142 mmctx_data(0x404660, 1)
143 mmctx_data(0x404678, 19)
144 mmctx_data(0x4046c8, 3)
145 mmctx_data(0x404700, 3)
146 mmctx_data(0x404718, 10)
147 mmctx_data(0x404744, 2)
148 mmctx_data(0x404754, 1)
149 mmctx_data(0x405800, 1)
150 mmctx_data(0x405830, 3)
151 mmctx_data(0x405854, 1)
152 mmctx_data(0x405870, 4)
153 mmctx_data(0x405a00, 2)
154 mmctx_data(0x405a18, 1)
155 mmctx_data(0x405b00, 1)
156 mmctx_data(0x405b10, 1)
157 mmctx_data(0x405b20, 1)
158 mmctx_data(0x406020, 1)
159 mmctx_data(0x406028, 4)
160 mmctx_data(0x4064a8, 5)
161 mmctx_data(0x4064c0, 12)
162 mmctx_data(0x4064fc, 1)
163 mmctx_data(0x407804, 1)
164 mmctx_data(0x40780c, 6)
165 mmctx_data(0x4078bc, 1)
166 mmctx_data(0x408000, 7)
167 mmctx_data(0x408064, 1)
168 mmctx_data(0x408800, 3)
169 mmctx_data(0x408840, 1)
170 mmctx_data(0x408900, 3)
171 mmctx_data(0x408980, 1)
172 nvf0_hub_mmio_tail:
173
174 .section #nve0_grhub_code
175 bra #init
176 define(`include_code')
177 include(`nve0.fuc')
178
179 // reports an exception to the host
180 //
181 // In: $r15 error code (see nve0.fuc)
182 //
183 error:
184 push $r14
185 mov $r14 0x814
186 shl b32 $r14 6
187 iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code
188 mov $r14 0xc1c
189 shl b32 $r14 6
190 mov $r15 1
191 iowr I[$r14 + 0x000] $r15 // INTR_UP_SET
192 pop $r14
193 ret
194
195 // HUB fuc initialisation, executed by triggering ucode start, will
196 // fall through to main loop after completion.
197 //
198 // Input:
199 // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
200 //
201 // Output:
202 // CC_SCRATCH[0]:
203 // 31:31: set to signal completion
204 // CC_SCRATCH[1]:
205 // 31:0: total PGRAPH context size
206 //
207 init:
208 clear b32 $r0
209 mov $sp $r0
210 mov $xdbase $r0
211
212 // enable fifo access
213 mov $r1 0x1200
214 mov $r2 2
215 iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
216
217 // setup i0 handler, and route all interrupts to it
218 mov $r1 #ih
219 mov $iv0 $r1
220 mov $r1 0x400
221 iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
222
223 // route HUB_CHANNEL_SWITCH to fuc interrupt 8
224 mov $r3 0x404
225 shl b32 $r3 6
226 mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8
227 iowr I[$r3 + 0x000] $r2
228
229 // not sure what these are, route them because NVIDIA does, and
230 // the IRQ handler will signal the host if we ever get one.. we
231 // may find out if/why we need to handle these if so..
232 //
233 mov $r2 0x2004
234 iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9
235 mov $r2 0x200b
236 iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10
237 mov $r2 0x200c
238 iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15
239
240 // enable all INTR_UP interrupts
241 mov $r2 0xc24
242 shl b32 $r2 6
243 not b32 $r3 $r0
244 iowr I[$r2] $r3
245
246 // enable fifo, ctxsw, 9, 10, 15 interrupts
247 mov $r2 -0x78fc // 0x8704
248 sethi $r2 0
249 iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
250
251 // fifo level triggered, rest edge
252 sub b32 $r1 0x100
253 mov $r2 4
254 iowr I[$r1] $r2
255
256 // enable interrupts
257 bset $flags ie0
258
259 // fetch enabled GPC/ROP counts
260 mov $r14 -0x69fc // 0x409604
261 sethi $r14 0x400000
262 call #nv_rd32
263 extr $r1 $r15 16:20
264 st b32 D[$r0 + #rop_count] $r1
265 and $r15 0x1f
266 st b32 D[$r0 + #gpc_count] $r15
267
268 // set BAR_REQMASK to GPC mask
269 mov $r1 1
270 shl b32 $r1 $r15
271 sub b32 $r1 1
272 mov $r2 0x40c
273 shl b32 $r2 6
274 iowr I[$r2 + 0x000] $r1
275 iowr I[$r2 + 0x100] $r1
276
277 // find context data for this chipset
278 mov $r2 0x800
279 shl b32 $r2 6
280 iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
281 mov $r15 #chipsets - 8
282 init_find_chipset:
283 add b32 $r15 8
284 ld b32 $r3 D[$r15 + 0x00]
285 cmpu b32 $r3 $r2
286 bra e #init_context
287 cmpu b32 $r3 0
288 bra ne #init_find_chipset
289 // unknown chipset
290 ret
291
292 // context size calculation, reserve first 256 bytes for use by fuc
293 init_context:
294 mov $r1 256
295
296 // calculate size of mmio context data
297 ld b16 $r14 D[$r15 + 4]
298 ld b16 $r15 D[$r15 + 6]
299 sethi $r14 0
300 st b32 D[$r0 + #hub_mmio_list_head] $r14
301 st b32 D[$r0 + #hub_mmio_list_tail] $r15
302 call #mmctx_size
303
304 // set mmctx base addresses now so we don't have to do it later,
305 // they don't (currently) ever change
306 mov $r3 0x700
307 shl b32 $r3 6
308 shr b32 $r4 $r1 8
309 iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE
310 iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE
311 add b32 $r3 0x1300
312 add b32 $r1 $r15
313 shr b32 $r15 2
314 iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!?
315
316 // strands, base offset needs to be aligned to 256 bytes
317 shr b32 $r1 8
318 add b32 $r1 1
319 shl b32 $r1 8
320 mov b32 $r15 $r1
321 call #strand_ctx_init
322 add b32 $r1 $r15
323
324 // initialise each GPC in sequence by passing in the offset of its
325 // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
326 // has previously been uploaded by the host) running.
327 //
328 // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
329 // when it has completed, and return the size of its context data
330 // in GPCn_CC_SCRATCH[1]
331 //
332 ld b32 $r3 D[$r0 + #gpc_count]
333 mov $r4 0x2000
334 sethi $r4 0x500000
335 init_gpc:
336 // setup, and start GPC ucode running
337 add b32 $r14 $r4 0x804
338 mov b32 $r15 $r1
339 call #nv_wr32 // CC_SCRATCH[1] = ctx offset
340 add b32 $r14 $r4 0x800
341 mov b32 $r15 $r2
342 call #nv_wr32 // CC_SCRATCH[0] = chipset
343 add b32 $r14 $r4 0x10c
344 clear b32 $r15
345 call #nv_wr32
346 add b32 $r14 $r4 0x104
347 call #nv_wr32 // ENTRY
348 add b32 $r14 $r4 0x100
349 mov $r15 2 // CTRL_START_TRIGGER
350 call #nv_wr32 // CTRL
351
352 // wait for it to complete, and adjust context size
353 add b32 $r14 $r4 0x800
354 init_gpc_wait:
355 call #nv_rd32
356 xbit $r15 $r15 31
357 bra e #init_gpc_wait
358 add b32 $r14 $r4 0x804
359 call #nv_rd32
360 add b32 $r1 $r15
361
362 // next!
363 add b32 $r4 0x8000
364 sub b32 $r3 1
365 bra ne #init_gpc
366
367 // save context size, and tell host we're ready
368 mov $r2 0x800
369 shl b32 $r2 6
370 iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size
371 add b32 $r2 0x800
372 clear b32 $r1
373 bset $r1 31
374 iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000
375
376 // Main program loop, very simple, sleeps until woken up by the interrupt
377 // handler, pulls a command from the queue and executes its handler
378 //
379 main:
380 // sleep until we have something to do
381 bset $flags $p0
382 sleep $p0
383 mov $r13 #cmd_queue
384 call #queue_get
385 bra $p1 #main
386
387 // context switch, requested by GPU?
388 cmpu b32 $r14 0x4001
389 bra ne #main_not_ctx_switch
390 trace_set(T_AUTO)
391 mov $r1 0xb00
392 shl b32 $r1 6
393 iord $r2 I[$r1 + 0x100] // CHAN_NEXT
394 iord $r1 I[$r1 + 0x000] // CHAN_CUR
395
396 xbit $r3 $r1 31
397 bra e #chsw_no_prev
398 xbit $r3 $r2 31
399 bra e #chsw_prev_no_next
400 push $r2
401 mov b32 $r2 $r1
402 trace_set(T_SAVE)
403 bclr $flags $p1
404 bset $flags $p2
405 call #ctx_xfer
406 trace_clr(T_SAVE);
407 pop $r2
408 trace_set(T_LOAD);
409 bset $flags $p1
410 call #ctx_xfer
411 trace_clr(T_LOAD);
412 bra #chsw_done
413 chsw_prev_no_next:
414 push $r2
415 mov b32 $r2 $r1
416 bclr $flags $p1
417 bclr $flags $p2
418 call #ctx_xfer
419 pop $r2
420 mov $r1 0xb00
421 shl b32 $r1 6
422 iowr I[$r1] $r2
423 bra #chsw_done
424 chsw_no_prev:
425 xbit $r3 $r2 31
426 bra e #chsw_done
427 bset $flags $p1
428 bclr $flags $p2
429 call #ctx_xfer
430
431 // ack the context switch request
432 chsw_done:
433 mov $r1 0xb0c
434 shl b32 $r1 6
435 mov $r2 1
436 iowr I[$r1 + 0x000] $r2 // 0x409b0c
437 trace_clr(T_AUTO)
438 bra #main
439
440 // request to set current channel? (*not* a context switch)
441 main_not_ctx_switch:
442 cmpu b32 $r14 0x0001
443 bra ne #main_not_ctx_chan
444 mov b32 $r2 $r15
445 call #ctx_chan
446 bra #main_done
447
448 // request to store current channel context?
449 main_not_ctx_chan:
450 cmpu b32 $r14 0x0002
451 bra ne #main_not_ctx_save
452 trace_set(T_SAVE)
453 bclr $flags $p1
454 bclr $flags $p2
455 call #ctx_xfer
456 trace_clr(T_SAVE)
457 bra #main_done
458
459 main_not_ctx_save:
460 shl b32 $r15 $r14 16
461 or $r15 E_BAD_COMMAND
462 call #error
463 bra #main
464
465 main_done:
466 mov $r1 0x820
467 shl b32 $r1 6
468 clear b32 $r2
469 bset $r2 31
470 iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
471 bra #main
472
473 // interrupt handler
474 ih:
475 push $r8
476 mov $r8 $flags
477 push $r8
478 push $r9
479 push $r10
480 push $r11
481 push $r13
482 push $r14
483 push $r15
484
485 // incoming fifo command?
486 iord $r10 I[$r0 + 0x200] // INTR
487 and $r11 $r10 0x00000004
488 bra e #ih_no_fifo
489 // queue incoming fifo command for later processing
490 mov $r11 0x1900
491 mov $r13 #cmd_queue
492 iord $r14 I[$r11 + 0x100] // FIFO_CMD
493 iord $r15 I[$r11 + 0x000] // FIFO_DATA
494 call #queue_put
495 add b32 $r11 0x400
496 mov $r14 1
497 iowr I[$r11 + 0x000] $r14 // FIFO_ACK
498
499 // context switch request?
500 ih_no_fifo:
501 and $r11 $r10 0x00000100
502 bra e #ih_no_ctxsw
503 // enqueue a context switch for later processing
504 mov $r13 #cmd_queue
505 mov $r14 0x4001
506 call #queue_put
507
508 // anything we didn't handle, bring it to the host's attention
509 ih_no_ctxsw:
510 mov $r11 0x104
511 not b32 $r11
512 and $r11 $r10 $r11
513 bra e #ih_no_other
514 mov $r10 0xc1c
515 shl b32 $r10 6
516 iowr I[$r10] $r11 // INTR_UP_SET
517
518 // ack, and wake up main()
519 ih_no_other:
520 iowr I[$r0 + 0x100] $r10 // INTR_ACK
521
522 pop $r15
523 pop $r14
524 pop $r13
525 pop $r11
526 pop $r10
527 pop $r9
528 pop $r8
529 mov $flags $r8
530 pop $r8
531 bclr $flags $p0
532 iret
533
534 // Again, not real sure
535 //
536 // In: $r15 value to set 0x404170 to
537 //
538 ctx_4170s:
539 mov $r14 0x4170
540 sethi $r14 0x400000
541 or $r15 0x10
542 call #nv_wr32
543 ret
544
545 // Waits for a ctx_4170s() call to complete
546 //
547 ctx_4170w:
548 mov $r14 0x4170
549 sethi $r14 0x400000
550 call #nv_rd32
551 and $r15 0x10
552 bra ne #ctx_4170w
553 ret
554
555 // Disables various things, waits a bit, and re-enables them..
556 //
557 // Not sure how exactly this helps, perhaps "ENABLE" is not such a
558 // good description for the bits we turn off? Anyways, without this,
559 // funny things happen.
560 //
561 ctx_redswitch:
562 mov $r14 0x614
563 shl b32 $r14 6
564 mov $r15 0x270
565 iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL
566 mov $r15 8
567 ctx_redswitch_delay:
568 sub b32 $r15 1
569 bra ne #ctx_redswitch_delay
570 mov $r15 0x770
571 iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
572 ret
573
574 // Not a clue what this is for, except that unless the value is 0x10, the
575 // strand context is saved (and presumably restored) incorrectly..
576 //
577 // In: $r15 value to set to (0x00/0x10 are used)
578 //
579 ctx_86c:
580 mov $r14 0x86c
581 shl b32 $r14 6
582 iowr I[$r14] $r15 // HUB(0x86c) = val
583 mov $r14 -0x75ec
584 sethi $r14 0x400000
585 call #nv_wr32 // ROP(0xa14) = val
586 mov $r14 -0x5794
587 sethi $r14 0x410000
588 call #nv_wr32 // GPC(0x86c) = val
589 ret
590
591 // ctx_load - load's a channel's ctxctl data, and selects its vm
592 //
593 // In: $r2 channel address
594 //
595 ctx_load:
596 trace_set(T_CHAN)
597
598 // switch to channel, somewhat magic in parts..
599 mov $r10 12 // DONE_UNK12
600 call #wait_donez
601 mov $r1 0xa24
602 shl b32 $r1 6
603 iowr I[$r1 + 0x000] $r0 // 0x409a24
604 mov $r3 0xb00
605 shl b32 $r3 6
606 iowr I[$r3 + 0x100] $r2 // CHAN_NEXT
607 mov $r1 0xa0c
608 shl b32 $r1 6
609 mov $r4 7
610 iowr I[$r1 + 0x000] $r2 // MEM_CHAN
611 iowr I[$r1 + 0x100] $r4 // MEM_CMD
612 ctx_chan_wait_0:
613 iord $r4 I[$r1 + 0x100]
614 and $r4 0x1f
615 bra ne #ctx_chan_wait_0
616 iowr I[$r3 + 0x000] $r2 // CHAN_CUR
617
618 // load channel header, fetch PGRAPH context pointer
619 mov $xtargets $r0
620 bclr $r2 31
621 shl b32 $r2 4
622 add b32 $r2 2
623
624 trace_set(T_LCHAN)
625 mov $r1 0xa04
626 shl b32 $r1 6
627 iowr I[$r1 + 0x000] $r2 // MEM_BASE
628 mov $r1 0xa20
629 shl b32 $r1 6
630 mov $r2 0x0002
631 sethi $r2 0x80000000
632 iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram
633 mov $r1 0x10 // chan + 0x0210
634 mov $r2 #xfer_data
635 sethi $r2 0x00020000 // 16 bytes
636 xdld $r1 $r2
637 xdwait
638 trace_clr(T_LCHAN)
639
640 // update current context
641 ld b32 $r1 D[$r0 + #xfer_data + 4]
642 shl b32 $r1 24
643 ld b32 $r2 D[$r0 + #xfer_data + 0]
644 shr b32 $r2 8
645 or $r1 $r2
646 st b32 D[$r0 + #ctx_current] $r1
647
648 // set transfer base to start of context, and fetch context header
649 trace_set(T_LCTXH)
650 mov $r2 0xa04
651 shl b32 $r2 6
652 iowr I[$r2 + 0x000] $r1 // MEM_BASE
653 mov $r2 1
654 mov $r1 0xa20
655 shl b32 $r1 6
656 iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm
657 mov $r1 #chan_data
658 sethi $r1 0x00060000 // 256 bytes
659 xdld $r0 $r1
660 xdwait
661 trace_clr(T_LCTXH)
662
663 trace_clr(T_CHAN)
664 ret
665
666 // ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
667 // the active channel for ctxctl, but not actually transfer
668 // any context data. intended for use only during initial
669 // context construction.
670 //
671 // In: $r2 channel address
672 //
673 ctx_chan:
674 call #ctx_load
675 mov $r10 12 // DONE_UNK12
676 call #wait_donez
677 mov $r1 0xa10
678 shl b32 $r1 6
679 mov $r2 5
680 iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???)
681 ctx_chan_wait:
682 iord $r2 I[$r1 + 0x000]
683 or $r2 $r2
684 bra ne #ctx_chan_wait
685 ret
686
687 // Execute per-context state overrides list
688 //
689 // Only executed on the first load of a channel. Might want to look into
690 // removing this and having the host directly modify the channel's context
691 // to change this state... The nouveau DRM already builds this list as
692 // it's definitely needed for NVIDIA's, so we may as well use it for now
693 //
694 // Input: $r1 mmio list length
695 //
696 ctx_mmio_exec:
697 // set transfer base to be the mmio list
698 ld b32 $r3 D[$r0 + #chan_mmio_address]
699 mov $r2 0xa04
700 shl b32 $r2 6
701 iowr I[$r2 + 0x000] $r3 // MEM_BASE
702
703 clear b32 $r3
704 ctx_mmio_loop:
705 // fetch next 256 bytes of mmio list if necessary
706 and $r4 $r3 0xff
707 bra ne #ctx_mmio_pull
708 mov $r5 #xfer_data
709 sethi $r5 0x00060000 // 256 bytes
710 xdld $r3 $r5
711 xdwait
712
713 // execute a single list entry
714 ctx_mmio_pull:
715 ld b32 $r14 D[$r4 + #xfer_data + 0x00]
716 ld b32 $r15 D[$r4 + #xfer_data + 0x04]
717 call #nv_wr32
718
719 // next!
720 add b32 $r3 8
721 sub b32 $r1 1
722 bra ne #ctx_mmio_loop
723
724 // set transfer base back to the current context
725 ctx_mmio_done:
726 ld b32 $r3 D[$r0 + #ctx_current]
727 iowr I[$r2 + 0x000] $r3 // MEM_BASE
728
729 // disable the mmio list now, we don't need/want to execute it again
730 st b32 D[$r0 + #chan_mmio_count] $r0
731 mov $r1 #chan_data
732 sethi $r1 0x00060000 // 256 bytes
733 xdst $r0 $r1
734 xdwait
735 ret
736
737 // Transfer HUB context data between GPU and storage area
738 //
739 // In: $r2 channel address
740 // $p1 clear on save, set on load
741 // $p2 set if opposite direction done/will be done, so:
742 // on save it means: "a load will follow this save"
743 // on load it means: "a save preceeded this load"
744 //
745 ctx_xfer:
746 // according to mwk, some kind of wait for idle
747 mov $r15 0xc00
748 shl b32 $r15 6
749 mov $r14 4
750 iowr I[$r15 + 0x200] $r14
751 ctx_xfer_idle:
752 iord $r14 I[$r15 + 0x000]
753 and $r14 0x2000
754 bra ne #ctx_xfer_idle
755
756 bra not $p1 #ctx_xfer_pre
757 bra $p2 #ctx_xfer_pre_load
758 ctx_xfer_pre:
759 mov $r15 0x10
760 call #ctx_86c
761 bra not $p1 #ctx_xfer_exec
762
763 ctx_xfer_pre_load:
764 mov $r15 2
765 call #ctx_4170s
766 call #ctx_4170w
767 call #ctx_redswitch
768 clear b32 $r15
769 call #ctx_4170s
770 call #ctx_load
771
772 // fetch context pointer, and initiate xfer on all GPCs
773 ctx_xfer_exec:
774 ld b32 $r1 D[$r0 + #ctx_current]
775 mov $r2 0x414
776 shl b32 $r2 6
777 iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset
778 mov $r14 -0x5b00
779 sethi $r14 0x410000
780 mov b32 $r15 $r1
781 call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer
782 add b32 $r14 4
783 xbit $r15 $flags $p1
784 xbit $r2 $flags $p2
785 shl b32 $r2 1
786 or $r15 $r2
787 call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
788
789 // strands
790 mov $r1 0x4afc
791 sethi $r1 0x20000
792 mov $r2 0xc
793 iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
794 call #strand_wait
795 mov $r2 0x47fc
796 sethi $r2 0x20000
797 iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
798 xbit $r2 $flags $p1
799 add b32 $r2 3
800 iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
801
802 // mmio context
803 xbit $r10 $flags $p1 // direction
804 or $r10 6 // first, last
805 mov $r11 0 // base = 0
806 ld b32 $r12 D[$r0 + #hub_mmio_list_head]
807 ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
808 mov $r14 0 // not multi
809 call #mmctx_xfer
810
811 // wait for GPCs to all complete
812 mov $r10 8 // DONE_BAR
813 call #wait_doneo
814
815 // wait for strand xfer to complete
816 call #strand_wait
817
818 // post-op
819 bra $p1 #ctx_xfer_post
820 mov $r10 12 // DONE_UNK12
821 call #wait_donez
822 mov $r1 0xa10
823 shl b32 $r1 6
824 mov $r2 5
825 iowr I[$r1] $r2 // MEM_CMD
826 ctx_xfer_post_save_wait:
827 iord $r2 I[$r1]
828 or $r2 $r2
829 bra ne #ctx_xfer_post_save_wait
830
831 bra $p2 #ctx_xfer_done
832 ctx_xfer_post:
833 mov $r15 2
834 call #ctx_4170s
835 clear b32 $r15
836 call #ctx_86c
837 call #strand_post
838 call #ctx_4170w
839 clear b32 $r15
840 call #ctx_4170s
841
842 bra not $p1 #ctx_xfer_no_post_mmio
843 ld b32 $r1 D[$r0 + #chan_mmio_count]
844 or $r1 $r1
845 bra e #ctx_xfer_no_post_mmio
846 call #ctx_mmio_exec
847
848 ctx_xfer_no_post_mmio:
849
850 ctx_xfer_done:
851 ret
852
853 .align 256
This page took 0.087806 seconds and 5 git commands to generate.