drm/nvf0/gr: initial register/context setup
[deliverable/linux.git] / drivers / gpu / drm / nouveau / core / engine / graph / fuc / hubnve0.fuc
CommitLineData
1978a2f2
BS
1/* fuc microcode for nve0 PGRAPH/HUB
2 *
3 * Copyright 2011 Red Hat Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: Ben Skeggs
24 */
25
26/* To build:
27 * m4 nve0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nve0_grhub.fuc.h
28 */
29
30.section #nve0_grhub_data
31include(`nve0.fuc')
32gpc_count: .b32 0
33rop_count: .b32 0
34cmd_queue: queue_init
35hub_mmio_list_head: .b32 0
36hub_mmio_list_tail: .b32 0
37
38ctx_current: .b32 0
39
cb1e06e0
BS
40.align 256
41chan_data:
42chan_mmio_count: .b32 0
43chan_mmio_address: .b32 0
44
45.align 256
46xfer_data: .b32 0
47
48.align 256
1978a2f2
BS
49chipsets:
50.b8 0xe4 0 0 0
51.b16 #nve4_hub_mmio_head
52.b16 #nve4_hub_mmio_tail
53.b8 0xe7 0 0 0
54.b16 #nve4_hub_mmio_head
55.b16 #nve4_hub_mmio_tail
eca15296
BS
56.b8 0xe6 0 0 0
57.b16 #nve4_hub_mmio_head
58.b16 #nve4_hub_mmio_tail
cb1e06e0
BS
59.b8 0xf0 0 0 0
60.b16 #nvf0_hub_mmio_head
61.b16 #nvf0_hub_mmio_tail
1978a2f2
BS
62.b8 0 0 0 0
63
64nve4_hub_mmio_head:
65mmctx_data(0x17e91c, 2)
66mmctx_data(0x400204, 2)
67mmctx_data(0x404010, 7)
68mmctx_data(0x4040a8, 9)
69mmctx_data(0x4040d0, 7)
70mmctx_data(0x4040f8, 1)
71mmctx_data(0x404130, 3)
72mmctx_data(0x404150, 3)
73mmctx_data(0x404164, 1)
74mmctx_data(0x4041a0, 4)
75mmctx_data(0x404200, 4)
76mmctx_data(0x404404, 14)
77mmctx_data(0x404460, 4)
78mmctx_data(0x404480, 1)
79mmctx_data(0x404498, 1)
80mmctx_data(0x404604, 4)
81mmctx_data(0x404618, 4)
82mmctx_data(0x40462c, 2)
83mmctx_data(0x404640, 1)
84mmctx_data(0x404654, 1)
85mmctx_data(0x404660, 1)
86mmctx_data(0x404678, 19)
87mmctx_data(0x4046c8, 3)
88mmctx_data(0x404700, 3)
89mmctx_data(0x404718, 10)
90mmctx_data(0x404744, 2)
91mmctx_data(0x404754, 1)
92mmctx_data(0x405800, 1)
93mmctx_data(0x405830, 3)
94mmctx_data(0x405854, 1)
95mmctx_data(0x405870, 4)
96mmctx_data(0x405a00, 2)
97mmctx_data(0x405a18, 1)
98mmctx_data(0x405b00, 1)
99mmctx_data(0x405b10, 1)
100mmctx_data(0x406020, 1)
101mmctx_data(0x406028, 4)
102mmctx_data(0x4064a8, 2)
103mmctx_data(0x4064b4, 2)
104mmctx_data(0x4064c0, 12)
105mmctx_data(0x4064fc, 1)
106mmctx_data(0x407040, 1)
107mmctx_data(0x407804, 1)
108mmctx_data(0x40780c, 6)
109mmctx_data(0x4078bc, 1)
110mmctx_data(0x408000, 7)
111mmctx_data(0x408064, 1)
112mmctx_data(0x408800, 3)
113mmctx_data(0x408840, 1)
114mmctx_data(0x408900, 3)
115mmctx_data(0x408980, 1)
116nve4_hub_mmio_tail:
117
cb1e06e0
BS
118nvf0_hub_mmio_head:
119mmctx_data(0x17e91c, 2)
120mmctx_data(0x400204, 2)
121mmctx_data(0x404004, 17)
122mmctx_data(0x4040a8, 9)
123mmctx_data(0x4040d0, 7)
124mmctx_data(0x4040f8, 1)
125mmctx_data(0x404100, 10)
126mmctx_data(0x404130, 3)
127mmctx_data(0x404150, 3)
128mmctx_data(0x404164, 1)
129mmctx_data(0x40417c, 2)
130mmctx_data(0x4041a0, 4)
131mmctx_data(0x404200, 4)
132mmctx_data(0x404404, 12)
133mmctx_data(0x404438, 1)
134mmctx_data(0x404460, 4)
135mmctx_data(0x404480, 1)
136mmctx_data(0x404498, 1)
137mmctx_data(0x404604, 4)
138mmctx_data(0x404618, 4)
139mmctx_data(0x40462c, 2)
140mmctx_data(0x404640, 1)
141mmctx_data(0x404654, 1)
142mmctx_data(0x404660, 1)
143mmctx_data(0x404678, 19)
144mmctx_data(0x4046c8, 3)
145mmctx_data(0x404700, 3)
146mmctx_data(0x404718, 10)
147mmctx_data(0x404744, 2)
148mmctx_data(0x404754, 1)
149mmctx_data(0x405800, 1)
150mmctx_data(0x405830, 3)
151mmctx_data(0x405854, 1)
152mmctx_data(0x405870, 4)
153mmctx_data(0x405a00, 2)
154mmctx_data(0x405a18, 1)
155mmctx_data(0x405b00, 1)
156mmctx_data(0x405b10, 1)
157mmctx_data(0x405b20, 1)
158mmctx_data(0x406020, 1)
159mmctx_data(0x406028, 4)
160mmctx_data(0x4064a8, 5)
161mmctx_data(0x4064c0, 12)
162mmctx_data(0x4064fc, 1)
163mmctx_data(0x407804, 1)
164mmctx_data(0x40780c, 6)
165mmctx_data(0x4078bc, 1)
166mmctx_data(0x408000, 7)
167mmctx_data(0x408064, 1)
168mmctx_data(0x408800, 3)
169mmctx_data(0x408840, 1)
170mmctx_data(0x408900, 3)
171mmctx_data(0x408980, 1)
172nvf0_hub_mmio_tail:
1978a2f2
BS
173
174.section #nve0_grhub_code
175bra #init
176define(`include_code')
177include(`nve0.fuc')
178
179// reports an exception to the host
180//
181// In: $r15 error code (see nve0.fuc)
182//
183error:
184 push $r14
185 mov $r14 0x814
186 shl b32 $r14 6
187 iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code
188 mov $r14 0xc1c
189 shl b32 $r14 6
190 mov $r15 1
191 iowr I[$r14 + 0x000] $r15 // INTR_UP_SET
192 pop $r14
193 ret
194
195// HUB fuc initialisation, executed by triggering ucode start, will
196// fall through to main loop after completion.
197//
198// Input:
199// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
200//
201// Output:
202// CC_SCRATCH[0]:
203// 31:31: set to signal completion
204// CC_SCRATCH[1]:
205// 31:0: total PGRAPH context size
206//
207init:
208 clear b32 $r0
209 mov $sp $r0
210 mov $xdbase $r0
211
212 // enable fifo access
213 mov $r1 0x1200
214 mov $r2 2
215 iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
216
217 // setup i0 handler, and route all interrupts to it
218 mov $r1 #ih
219 mov $iv0 $r1
220 mov $r1 0x400
221 iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
222
223 // route HUB_CHANNEL_SWITCH to fuc interrupt 8
224 mov $r3 0x404
225 shl b32 $r3 6
226 mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8
227 iowr I[$r3 + 0x000] $r2
228
229 // not sure what these are, route them because NVIDIA does, and
230 // the IRQ handler will signal the host if we ever get one.. we
231 // may find out if/why we need to handle these if so..
232 //
233 mov $r2 0x2004
234 iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9
235 mov $r2 0x200b
236 iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10
237 mov $r2 0x200c
238 iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15
239
240 // enable all INTR_UP interrupts
241 mov $r2 0xc24
242 shl b32 $r2 6
243 not b32 $r3 $r0
244 iowr I[$r2] $r3
245
246 // enable fifo, ctxsw, 9, 10, 15 interrupts
247 mov $r2 -0x78fc // 0x8704
248 sethi $r2 0
249 iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
250
251 // fifo level triggered, rest edge
252 sub b32 $r1 0x100
253 mov $r2 4
254 iowr I[$r1] $r2
255
256 // enable interrupts
257 bset $flags ie0
258
259 // fetch enabled GPC/ROP counts
260 mov $r14 -0x69fc // 0x409604
261 sethi $r14 0x400000
262 call #nv_rd32
263 extr $r1 $r15 16:20
264 st b32 D[$r0 + #rop_count] $r1
265 and $r15 0x1f
266 st b32 D[$r0 + #gpc_count] $r15
267
268 // set BAR_REQMASK to GPC mask
269 mov $r1 1
270 shl b32 $r1 $r15
271 sub b32 $r1 1
272 mov $r2 0x40c
273 shl b32 $r2 6
274 iowr I[$r2 + 0x000] $r1
275 iowr I[$r2 + 0x100] $r1
276
277 // find context data for this chipset
278 mov $r2 0x800
279 shl b32 $r2 6
280 iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
281 mov $r15 #chipsets - 8
282 init_find_chipset:
283 add b32 $r15 8
284 ld b32 $r3 D[$r15 + 0x00]
285 cmpu b32 $r3 $r2
286 bra e #init_context
287 cmpu b32 $r3 0
288 bra ne #init_find_chipset
289 // unknown chipset
290 ret
291
292 // context size calculation, reserve first 256 bytes for use by fuc
293 init_context:
294 mov $r1 256
295
296 // calculate size of mmio context data
297 ld b16 $r14 D[$r15 + 4]
298 ld b16 $r15 D[$r15 + 6]
299 sethi $r14 0
300 st b32 D[$r0 + #hub_mmio_list_head] $r14
301 st b32 D[$r0 + #hub_mmio_list_tail] $r15
302 call #mmctx_size
303
304 // set mmctx base addresses now so we don't have to do it later,
305 // they don't (currently) ever change
306 mov $r3 0x700
307 shl b32 $r3 6
308 shr b32 $r4 $r1 8
309 iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE
310 iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE
311 add b32 $r3 0x1300
312 add b32 $r1 $r15
313 shr b32 $r15 2
314 iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!?
315
316 // strands, base offset needs to be aligned to 256 bytes
317 shr b32 $r1 8
318 add b32 $r1 1
319 shl b32 $r1 8
320 mov b32 $r15 $r1
321 call #strand_ctx_init
322 add b32 $r1 $r15
323
324 // initialise each GPC in sequence by passing in the offset of its
325 // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
326 // has previously been uploaded by the host) running.
327 //
328 // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
329 // when it has completed, and return the size of its context data
330 // in GPCn_CC_SCRATCH[1]
331 //
332 ld b32 $r3 D[$r0 + #gpc_count]
333 mov $r4 0x2000
334 sethi $r4 0x500000
335 init_gpc:
336 // setup, and start GPC ucode running
337 add b32 $r14 $r4 0x804
338 mov b32 $r15 $r1
339 call #nv_wr32 // CC_SCRATCH[1] = ctx offset
340 add b32 $r14 $r4 0x800
341 mov b32 $r15 $r2
342 call #nv_wr32 // CC_SCRATCH[0] = chipset
343 add b32 $r14 $r4 0x10c
344 clear b32 $r15
345 call #nv_wr32
346 add b32 $r14 $r4 0x104
347 call #nv_wr32 // ENTRY
348 add b32 $r14 $r4 0x100
349 mov $r15 2 // CTRL_START_TRIGGER
350 call #nv_wr32 // CTRL
351
352 // wait for it to complete, and adjust context size
353 add b32 $r14 $r4 0x800
354 init_gpc_wait:
355 call #nv_rd32
356 xbit $r15 $r15 31
357 bra e #init_gpc_wait
358 add b32 $r14 $r4 0x804
359 call #nv_rd32
360 add b32 $r1 $r15
361
362 // next!
363 add b32 $r4 0x8000
364 sub b32 $r3 1
365 bra ne #init_gpc
366
367 // save context size, and tell host we're ready
368 mov $r2 0x800
369 shl b32 $r2 6
370 iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size
371 add b32 $r2 0x800
372 clear b32 $r1
373 bset $r1 31
374 iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000
375
376// Main program loop, very simple, sleeps until woken up by the interrupt
377// handler, pulls a command from the queue and executes its handler
378//
379main:
380 // sleep until we have something to do
381 bset $flags $p0
382 sleep $p0
383 mov $r13 #cmd_queue
384 call #queue_get
385 bra $p1 #main
386
387 // context switch, requested by GPU?
388 cmpu b32 $r14 0x4001
389 bra ne #main_not_ctx_switch
390 trace_set(T_AUTO)
391 mov $r1 0xb00
392 shl b32 $r1 6
393 iord $r2 I[$r1 + 0x100] // CHAN_NEXT
394 iord $r1 I[$r1 + 0x000] // CHAN_CUR
395
396 xbit $r3 $r1 31
397 bra e #chsw_no_prev
398 xbit $r3 $r2 31
399 bra e #chsw_prev_no_next
400 push $r2
401 mov b32 $r2 $r1
402 trace_set(T_SAVE)
403 bclr $flags $p1
404 bset $flags $p2
405 call #ctx_xfer
406 trace_clr(T_SAVE);
407 pop $r2
408 trace_set(T_LOAD);
409 bset $flags $p1
410 call #ctx_xfer
411 trace_clr(T_LOAD);
412 bra #chsw_done
413 chsw_prev_no_next:
414 push $r2
415 mov b32 $r2 $r1
416 bclr $flags $p1
417 bclr $flags $p2
418 call #ctx_xfer
419 pop $r2
420 mov $r1 0xb00
421 shl b32 $r1 6
422 iowr I[$r1] $r2
423 bra #chsw_done
424 chsw_no_prev:
425 xbit $r3 $r2 31
426 bra e #chsw_done
427 bset $flags $p1
428 bclr $flags $p2
429 call #ctx_xfer
430
431 // ack the context switch request
432 chsw_done:
433 mov $r1 0xb0c
434 shl b32 $r1 6
435 mov $r2 1
436 iowr I[$r1 + 0x000] $r2 // 0x409b0c
437 trace_clr(T_AUTO)
438 bra #main
439
440 // request to set current channel? (*not* a context switch)
441 main_not_ctx_switch:
442 cmpu b32 $r14 0x0001
443 bra ne #main_not_ctx_chan
444 mov b32 $r2 $r15
445 call #ctx_chan
446 bra #main_done
447
448 // request to store current channel context?
449 main_not_ctx_chan:
450 cmpu b32 $r14 0x0002
451 bra ne #main_not_ctx_save
452 trace_set(T_SAVE)
453 bclr $flags $p1
454 bclr $flags $p2
455 call #ctx_xfer
456 trace_clr(T_SAVE)
457 bra #main_done
458
459 main_not_ctx_save:
460 shl b32 $r15 $r14 16
461 or $r15 E_BAD_COMMAND
462 call #error
463 bra #main
464
465 main_done:
466 mov $r1 0x820
467 shl b32 $r1 6
468 clear b32 $r2
469 bset $r2 31
470 iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
471 bra #main
472
473// interrupt handler
474ih:
475 push $r8
476 mov $r8 $flags
477 push $r8
478 push $r9
479 push $r10
480 push $r11
481 push $r13
482 push $r14
483 push $r15
484
485 // incoming fifo command?
486 iord $r10 I[$r0 + 0x200] // INTR
487 and $r11 $r10 0x00000004
488 bra e #ih_no_fifo
489 // queue incoming fifo command for later processing
490 mov $r11 0x1900
491 mov $r13 #cmd_queue
492 iord $r14 I[$r11 + 0x100] // FIFO_CMD
493 iord $r15 I[$r11 + 0x000] // FIFO_DATA
494 call #queue_put
495 add b32 $r11 0x400
496 mov $r14 1
497 iowr I[$r11 + 0x000] $r14 // FIFO_ACK
498
499 // context switch request?
500 ih_no_fifo:
501 and $r11 $r10 0x00000100
502 bra e #ih_no_ctxsw
503 // enqueue a context switch for later processing
504 mov $r13 #cmd_queue
505 mov $r14 0x4001
506 call #queue_put
507
508 // anything we didn't handle, bring it to the host's attention
509 ih_no_ctxsw:
510 mov $r11 0x104
511 not b32 $r11
512 and $r11 $r10 $r11
513 bra e #ih_no_other
514 mov $r10 0xc1c
515 shl b32 $r10 6
516 iowr I[$r10] $r11 // INTR_UP_SET
517
518 // ack, and wake up main()
519 ih_no_other:
520 iowr I[$r0 + 0x100] $r10 // INTR_ACK
521
522 pop $r15
523 pop $r14
524 pop $r13
525 pop $r11
526 pop $r10
527 pop $r9
528 pop $r8
529 mov $flags $r8
530 pop $r8
531 bclr $flags $p0
532 iret
533
1978a2f2
BS
534// Again, not real sure
535//
536// In: $r15 value to set 0x404170 to
537//
538ctx_4170s:
539 mov $r14 0x4170
540 sethi $r14 0x400000
541 or $r15 0x10
542 call #nv_wr32
543 ret
544
545// Waits for a ctx_4170s() call to complete
546//
547ctx_4170w:
548 mov $r14 0x4170
549 sethi $r14 0x400000
550 call #nv_rd32
551 and $r15 0x10
552 bra ne #ctx_4170w
553 ret
554
555// Disables various things, waits a bit, and re-enables them..
556//
557// Not sure how exactly this helps, perhaps "ENABLE" is not such a
558// good description for the bits we turn off? Anyways, without this,
559// funny things happen.
560//
561ctx_redswitch:
562 mov $r14 0x614
563 shl b32 $r14 6
564 mov $r15 0x270
565 iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL
566 mov $r15 8
567 ctx_redswitch_delay:
568 sub b32 $r15 1
569 bra ne #ctx_redswitch_delay
570 mov $r15 0x770
571 iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
572 ret
573
574// Not a clue what this is for, except that unless the value is 0x10, the
575// strand context is saved (and presumably restored) incorrectly..
576//
577// In: $r15 value to set to (0x00/0x10 are used)
578//
579ctx_86c:
580 mov $r14 0x86c
581 shl b32 $r14 6
582 iowr I[$r14] $r15 // HUB(0x86c) = val
583 mov $r14 -0x75ec
584 sethi $r14 0x400000
585 call #nv_wr32 // ROP(0xa14) = val
586 mov $r14 -0x5794
587 sethi $r14 0x410000
588 call #nv_wr32 // GPC(0x86c) = val
589 ret
590
591// ctx_load - load's a channel's ctxctl data, and selects its vm
592//
593// In: $r2 channel address
594//
595ctx_load:
596 trace_set(T_CHAN)
597
598 // switch to channel, somewhat magic in parts..
599 mov $r10 12 // DONE_UNK12
600 call #wait_donez
601 mov $r1 0xa24
602 shl b32 $r1 6
603 iowr I[$r1 + 0x000] $r0 // 0x409a24
604 mov $r3 0xb00
605 shl b32 $r3 6
606 iowr I[$r3 + 0x100] $r2 // CHAN_NEXT
607 mov $r1 0xa0c
608 shl b32 $r1 6
609 mov $r4 7
610 iowr I[$r1 + 0x000] $r2 // MEM_CHAN
611 iowr I[$r1 + 0x100] $r4 // MEM_CMD
612 ctx_chan_wait_0:
613 iord $r4 I[$r1 + 0x100]
614 and $r4 0x1f
615 bra ne #ctx_chan_wait_0
616 iowr I[$r3 + 0x000] $r2 // CHAN_CUR
617
618 // load channel header, fetch PGRAPH context pointer
619 mov $xtargets $r0
620 bclr $r2 31
621 shl b32 $r2 4
622 add b32 $r2 2
623
624 trace_set(T_LCHAN)
625 mov $r1 0xa04
626 shl b32 $r1 6
627 iowr I[$r1 + 0x000] $r2 // MEM_BASE
628 mov $r1 0xa20
629 shl b32 $r1 6
630 mov $r2 0x0002
631 sethi $r2 0x80000000
632 iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram
633 mov $r1 0x10 // chan + 0x0210
634 mov $r2 #xfer_data
635 sethi $r2 0x00020000 // 16 bytes
636 xdld $r1 $r2
637 xdwait
638 trace_clr(T_LCHAN)
639
640 // update current context
641 ld b32 $r1 D[$r0 + #xfer_data + 4]
642 shl b32 $r1 24
643 ld b32 $r2 D[$r0 + #xfer_data + 0]
644 shr b32 $r2 8
645 or $r1 $r2
646 st b32 D[$r0 + #ctx_current] $r1
647
648 // set transfer base to start of context, and fetch context header
649 trace_set(T_LCTXH)
650 mov $r2 0xa04
651 shl b32 $r2 6
652 iowr I[$r2 + 0x000] $r1 // MEM_BASE
653 mov $r2 1
654 mov $r1 0xa20
655 shl b32 $r1 6
656 iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm
657 mov $r1 #chan_data
658 sethi $r1 0x00060000 // 256 bytes
659 xdld $r0 $r1
660 xdwait
661 trace_clr(T_LCTXH)
662
663 trace_clr(T_CHAN)
664 ret
665
666// ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
667// the active channel for ctxctl, but not actually transfer
668// any context data. intended for use only during initial
669// context construction.
670//
671// In: $r2 channel address
672//
673ctx_chan:
1978a2f2
BS
674 call #ctx_load
675 mov $r10 12 // DONE_UNK12
676 call #wait_donez
677 mov $r1 0xa10
678 shl b32 $r1 6
679 mov $r2 5
680 iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???)
681 ctx_chan_wait:
682 iord $r2 I[$r1 + 0x000]
683 or $r2 $r2
684 bra ne #ctx_chan_wait
1978a2f2
BS
685 ret
686
687// Execute per-context state overrides list
688//
689// Only executed on the first load of a channel. Might want to look into
690// removing this and having the host directly modify the channel's context
691// to change this state... The nouveau DRM already builds this list as
692// it's definitely needed for NVIDIA's, so we may as well use it for now
693//
694// Input: $r1 mmio list length
695//
696ctx_mmio_exec:
697 // set transfer base to be the mmio list
698 ld b32 $r3 D[$r0 + #chan_mmio_address]
699 mov $r2 0xa04
700 shl b32 $r2 6
701 iowr I[$r2 + 0x000] $r3 // MEM_BASE
702
703 clear b32 $r3
704 ctx_mmio_loop:
705 // fetch next 256 bytes of mmio list if necessary
706 and $r4 $r3 0xff
707 bra ne #ctx_mmio_pull
708 mov $r5 #xfer_data
709 sethi $r5 0x00060000 // 256 bytes
710 xdld $r3 $r5
711 xdwait
712
713 // execute a single list entry
714 ctx_mmio_pull:
715 ld b32 $r14 D[$r4 + #xfer_data + 0x00]
716 ld b32 $r15 D[$r4 + #xfer_data + 0x04]
717 call #nv_wr32
718
719 // next!
720 add b32 $r3 8
721 sub b32 $r1 1
722 bra ne #ctx_mmio_loop
723
724 // set transfer base back to the current context
725 ctx_mmio_done:
726 ld b32 $r3 D[$r0 + #ctx_current]
727 iowr I[$r2 + 0x000] $r3 // MEM_BASE
728
729 // disable the mmio list now, we don't need/want to execute it again
730 st b32 D[$r0 + #chan_mmio_count] $r0
731 mov $r1 #chan_data
732 sethi $r1 0x00060000 // 256 bytes
733 xdst $r0 $r1
734 xdwait
735 ret
736
737// Transfer HUB context data between GPU and storage area
738//
739// In: $r2 channel address
740// $p1 clear on save, set on load
741// $p2 set if opposite direction done/will be done, so:
742// on save it means: "a load will follow this save"
743// on load it means: "a save preceeded this load"
744//
745ctx_xfer:
eca15296
BS
746 // according to mwk, some kind of wait for idle
747 mov $r15 0xc00
748 shl b32 $r15 6
749 mov $r14 4
750 iowr I[$r15 + 0x200] $r14
751 ctx_xfer_idle:
752 iord $r14 I[$r15 + 0x000]
753 and $r14 0x2000
754 bra ne #ctx_xfer_idle
755
1978a2f2
BS
756 bra not $p1 #ctx_xfer_pre
757 bra $p2 #ctx_xfer_pre_load
758 ctx_xfer_pre:
759 mov $r15 0x10
760 call #ctx_86c
1978a2f2
BS
761 bra not $p1 #ctx_xfer_exec
762
763 ctx_xfer_pre_load:
764 mov $r15 2
765 call #ctx_4170s
766 call #ctx_4170w
767 call #ctx_redswitch
768 clear b32 $r15
769 call #ctx_4170s
770 call #ctx_load
771
772 // fetch context pointer, and initiate xfer on all GPCs
773 ctx_xfer_exec:
774 ld b32 $r1 D[$r0 + #ctx_current]
775 mov $r2 0x414
776 shl b32 $r2 6
777 iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset
778 mov $r14 -0x5b00
779 sethi $r14 0x410000
780 mov b32 $r15 $r1
781 call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer
782 add b32 $r14 4
783 xbit $r15 $flags $p1
784 xbit $r2 $flags $p2
785 shl b32 $r2 1
786 or $r15 $r2
787 call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
788
789 // strands
790 mov $r1 0x4afc
791 sethi $r1 0x20000
792 mov $r2 0xc
793 iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
794 call #strand_wait
795 mov $r2 0x47fc
796 sethi $r2 0x20000
797 iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
798 xbit $r2 $flags $p1
799 add b32 $r2 3
800 iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
801
802 // mmio context
803 xbit $r10 $flags $p1 // direction
804 or $r10 6 // first, last
805 mov $r11 0 // base = 0
806 ld b32 $r12 D[$r0 + #hub_mmio_list_head]
807 ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
808 mov $r14 0 // not multi
809 call #mmctx_xfer
810
811 // wait for GPCs to all complete
812 mov $r10 8 // DONE_BAR
813 call #wait_doneo
814
815 // wait for strand xfer to complete
816 call #strand_wait
817
818 // post-op
819 bra $p1 #ctx_xfer_post
820 mov $r10 12 // DONE_UNK12
821 call #wait_donez
822 mov $r1 0xa10
823 shl b32 $r1 6
824 mov $r2 5
825 iowr I[$r1] $r2 // MEM_CMD
826 ctx_xfer_post_save_wait:
827 iord $r2 I[$r1]
828 or $r2 $r2
829 bra ne #ctx_xfer_post_save_wait
830
831 bra $p2 #ctx_xfer_done
832 ctx_xfer_post:
833 mov $r15 2
834 call #ctx_4170s
835 clear b32 $r15
836 call #ctx_86c
837 call #strand_post
838 call #ctx_4170w
839 clear b32 $r15
840 call #ctx_4170s
841
842 bra not $p1 #ctx_xfer_no_post_mmio
843 ld b32 $r1 D[$r0 + #chan_mmio_count]
844 or $r1 $r1
845 bra e #ctx_xfer_no_post_mmio
846 call #ctx_mmio_exec
847
848 ctx_xfer_no_post_mmio:
1978a2f2
BS
849
850 ctx_xfer_done:
851 ret
852
853.align 256
This page took 0.106345 seconds and 5 git commands to generate.