-/* 64b add to compute next ea64. */
- rotqmbyi off64, sz, -4
- cg cgbits, ea64, off64
- shufb add64, cgbits, cgbits, cgshuf
- addx add64, ea64, off64
- ori ea64, add64, 0
-
-/* Setup DMA parameters, then issue DMA request. */
- rotqbyi ealo, add64, 4
- ila maxsize, MFC_MAX_DMA_SIZE
- cgt cmp, osize, maxsize
- selb sz, osize, maxsize, cmp
- ila tagid, MFC_TAG_ID
- wrch $MFC_LSA, vma
- wrch $MFC_EAH, ea64
- wrch $MFC_EAL, ealo
- wrch $MFC_Size, sz
- wrch $MFC_TagId, tagid
- ila cmd, MFC_GET_CMD
- wrch $MFC_Cmd, cmd
-
-/* Increment vma, decrement size, branch back as needed. */
- a vma, vma, sz
- sf osize, sz, osize
- brnz osize, __ovly_xfer_loop
-
-/* Save app's tagmask, wait for DMA complete, restore mask. */
- rdch oldmask, $MFC_RdTagMask
+#nop
+ rotqmbyi off64, sz, -4 # 1,4 4
+#nop; lnop
+#nop; lnop
+#nop; lnop
+ cg cgbits, ea64, off64 # 0,2 8
+#lnop
+#nop; lnop
+#nop
+ shufb add64, cgbits, cgbits, cgshuf # 1,4 10
+#nop; lnop
+#nop; lnop
+#nop; lnop
+ addx add64, ea64, off64 # 0,2 14
+#lnop
+ ila maxsize, MFC_MAX_DMA_SIZE # 0,2 15
+ lnop
+ ori ea64, add64, 0 # 0,2 16
+ rotqbyi ealo, add64, 4 # 1,4 16
+ cgt cmp, osize, maxsize # 0,2 17
+ wrch $MFC_LSA, vma # 1,6 17
+#nop; lnop
+ selb sz, osize, maxsize, cmp # 0,2 19
+ wrch $MFC_EAH, ea64 # 1,6 19
+ ila tagid, MFC_TAG_ID # 0,2 20
+ wrch $MFC_EAL, ealo # 1,6 20
+ ila cmd, MFC_GET_CMD # 0,2 21
+ wrch $MFC_Size, sz # 1,6 21
+ sf osize, sz, osize # 0,2 22
+ wrch $MFC_TagId, tagid # 1,6 22
+ a vma, vma, sz # 0,2 23
+ wrch $MFC_Cmd, cmd # 1,6 23
+#nop
+ brnz osize, __ovly_xfer_loop # 1,4 24
+
+/* Now update our data structions while waiting for DMA to complete.
+ Low bit of .size needs to be cleared on the _ovly_table entry
+ corresponding to the evicted overlay, and set on the entry for the
+ newly loaded overlay. Note that no overlay may in fact be evicted
+ as _ovly_buf_table[] starts with all zeros. Don't zap .size entry
+ for zero index! Also of course update the _ovly_buf_table entry. */
+#nop
+ lqr newovl, __ovly_current # 1,6 25
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+ shli off3, newovl, 4 # 0,4 31
+#lnop
+ ila tab3, _ovly_table - 16 # 0,2 32
+#lnop
+#nop
+ fsmbi pbyte, 0x100 # 1,4 33
+#nop; lnop
+#nop
+ lqx vma, tab3, off3 # 1,6 35
+#nop; lnop
+ andi pbit, pbyte, 1 # 0,2 37
+ lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+ or newvma, vma, pbit # 0,2 41
+ rotqbyi buf3, vma, 12 # 1,4 41
+#nop; lnop
+#nop
+ stqx newvma, tab3, off3 # 1,6 43
+#nop; lnop
+ shli off4, buf3, 2 # 1,4 45
+#lnop
+ ila tab4, _ovly_buf_table - 4 # 0,2 46
+#lnop
+#nop; lnop
+#nop; lnop
+#nop
+ lqx map, tab4, off4 # 1,6 49
+#nop
+ cwx genwi, tab4, off4 # 1,4 50
+ a addr4, tab4, off4 # 0,2 51
+#lnop
+#nop; lnop
+#nop; lnop
+#nop; lnop
+#nop
+ rotqby oldovl, map, addr4 # 1,4 55
+#nop
+ shufb newmap, newovl, map, genwi # 0,4 56