X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=ld%2Femultempl%2Fspu_ovl.S;h=08b78a1d39e029bfc584974afcb02b53428bb63b;hb=a435742a7fb32f6320ce0e6074e2500e28378104;hp=3f9c83bbca4e18baae3595891fbae1f872a1a026;hpb=47f6dab9a3fb353b0faca52fcc07d2f57c4d906c;p=deliverable%2Fbinutils-gdb.git diff --git a/ld/emultempl/spu_ovl.S b/ld/emultempl/spu_ovl.S index 3f9c83bbca..08b78a1d39 100644 --- a/ld/emultempl/spu_ovl.S +++ b/ld/emultempl/spu_ovl.S @@ -1,6 +1,6 @@ /* Overlay manager for SPU. - Copyright 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 2006-2020 Free Software Foundation, Inc. This file is part of the GNU Binutils. @@ -46,12 +46,13 @@ #define cgbits reserved2 #define off3 reserved2 #define off4 reserved2 +#define addr4 reserved2 #define off5 reserved2 #define tagstat reserved2 #define reserved3 $77 -#define buf1 reserved3 -#define buf2 reserved3 +#define size1 reserved3 +#define size2 reserved3 #define rv3 reserved3 #define ealo reserved3 #define cmd reserved3 @@ -66,11 +67,13 @@ #define rv5 reserved4 #define cgshuf reserved4 #define newovl reserved4 +#define irqtmp1 reserved4 +#define irqtmp2 reserved4 #define reserved5 $79 #define target reserved5 -#define save1 $72 +#define save1 $74 #define rv4 save1 #define rv7 save1 #define tagid save1 @@ -86,7 +89,7 @@ #define oldovl save2 #define newvma save2 -#define save3 $74 +#define save3 $72 #define rv1 save3 #define ea64 save3 #define buf3 save3 @@ -94,9 +97,11 @@ #define newmap save3 #define oldmask save3 +#define save4 $71 +#define irq_stat save4 .text - .align 4 + .align 4 .type __rv_pattern, @object .size __rv_pattern, 16 __rv_pattern: @@ -121,7 +126,7 @@ __ovly_current: * Load the previous overlay and jump to the caller return address. * Updates __ovly_current. */ - .align 4 + .align 4 .global __ovly_return .type __ovly_return, @function __ovly_return: @@ -139,24 +144,29 @@ __ovly_return: #nop; lnop #nop lqx vma, tab1, off1 # 1,6 8 +#ifdef OVLY_IRQ_SAVE + nop + stqd save4, -64($sp) # 1,6 9 +#else #nop; lnop +#endif #nop; lnop #nop; lnop #nop; lnop #nop; lnop #nop - rotqbyi buf1, vma, 12 # 1,4 14 + rotqbyi size1, vma, 4 # 1,4 14 #nop stqd save3, -48($sp) # 1,6 15 #nop stqd save2, -32($sp) # 1,6 16 #nop stqd save1, -16($sp) # 1,6 17 - andi present1, buf1, 1 # 0,2 18 - stqd ovl, (__ovly_current - __ovly_return)($lr) # 1,6 18 + andi present1, size1, 1 # 0,2 18 + stqr ovl, __ovly_current # 1,6 18 #nop; lnop #nop - brz present1, __ovly_load_event # 1,4 20 + brz present1, do_load # 1,4 20 ovly_ret9: #nop bi target # 1,4 21 @@ -167,7 +177,9 @@ ovly_ret9: * On entry $75 points to a word consisting of the overlay index in * the top 14 bits, and the target address in the bottom 18 bits. * - * Sets up $lr to return via __ovly_return. + * Sets up $lr to return via __ovly_return. If $lr is already set + * to return via __ovly_return, don't change it. In that case we + * have a tail call from one overlay function to another. * Updates __ovly_current. */ .align 3 @@ -197,11 +209,11 @@ __ovly_load: #lnop #nop; lnop #nop - lqd cur, (__ovly_current - __ovly_return)(rv1) # 1,6 2 + lqr cur, __ovly_current # 1,6 2 shli off2, ovl, 4 # 0,4 3 - stqd ovl, (__ovly_current - __ovly_return)(rv1) # 1,6 3 + stqr ovl, __ovly_current # 1,6 3 ceq rv2, $lr, rv1 # 0,2 4 - lqd rv3, (__rv_pattern - __ovly_return)(rv1) # 1,6 4 + lqr rv3, __rv_pattern # 1,6 4 #nop; lnop #nop; lnop #nop @@ -214,11 +226,11 @@ __ovly_load: ila rv1, __ovly_return # 0,2 1 stqd save2, -32($sp) # 1,6 1 shli off2, ovl, 4 # 0,4 2 - lqa cur, __ovly_current # 1,6 2 + lqr cur, __ovly_current # 1,6 2 nop - stqa ovl, __ovly_current # 1,6 3 + stqr ovl, __ovly_current # 1,6 3 ceq rv2, $lr, rv1 # 0,2 4 - lqd rv3, (__rv_pattern - __ovly_return)(rv1) # 1,6 4 + lqr rv3, __rv_pattern # 1,6 4 #nop hbr ovly_load9, target # 1,15 5 #nop @@ -237,18 +249,22 @@ __ovly_load: #nop rotqmbyi rv6, $lr, -8 # 1,4 12 #nop - rotqbyi buf2, vma, 12 # 1,4 13 + rotqbyi size2, vma, 4 # 1,4 13 #nop lqd save3, -48($sp) # 1,6 14 #nop; lnop or rv7, rv4, rv6 # 0,2 16 lqd save2, -32($sp) # 1,6 16 - andi present2, buf2, 1 # 0,2 17 + andi present2, size2, 1 # 0,2 17 +#ifdef OVLY_IRQ_SAVE + stqd save4, -64($sp) # 1,6 17 +#else lnop # 1,0 17 +#endif selb $lr, rv7, $lr, rv5 # 0,2 18 lqd save1, -16($sp) # 1,6 18 #nop - brz present2, __ovly_load_event # 1,4 19 + brz present2, do_load # 1,4 19 ovly_load9: #nop bi target # 1,4 20 @@ -266,14 +282,25 @@ ovly_load9: .global __ovly_load_event .type __ovly_load_event, @function __ovly_load_event: +do_load: +#ifdef OVLY_IRQ_SAVE + ila irqtmp1, do_load10 # 0,2 -5 + rotqbyi sz, vma, 8 # 1,4 -5 #nop - rotqbyi sz, vma, 8 # 1,4 0 + rdch irq_stat, $SPU_RdMachStat # 1,6 -4 +#nop + bid irqtmp1 # 1,4 -3 +do_load10: + nop +#else #nop + rotqbyi sz, vma, 8 # 1,4 0 +#endif rotqbyi osize, vma, 4 # 1,4 1 #nop lqa ea64, _EAR_ # 1,6 2 #nop - lqd cgshuf, (__cg_pattern - __ovly_return)($lr) # 1,6 3 + lqr cgshuf, __cg_pattern # 1,6 3 /* We could predict the branch at the end of this loop by adding a few instructions, and there are plenty of free cycles to do so without @@ -316,13 +343,13 @@ __ovly_xfer_loop: brnz osize, __ovly_xfer_loop # 1,4 24 /* Now update our data structions while waiting for DMA to complete. - Low bit of .buf needs to be cleared on the _ovly_table entry + Low bit of .size needs to be cleared on the _ovly_table entry corresponding to the evicted overlay, and set on the entry for the newly loaded overlay. Note that no overlay may in fact be evicted - as _ovly_buf_table[] starts with all zeros. Don't zap .buf entry + as _ovly_buf_table[] starts with all zeros. Don't zap .size entry for zero index! Also of course update the _ovly_buf_table entry. */ #nop - lqd newovl, (__ovly_current - __ovly_return)($lr) # 1,6 25 + lqr newovl, __ovly_current # 1,6 25 #nop; lnop #nop; lnop #nop; lnop @@ -333,7 +360,7 @@ __ovly_xfer_loop: ila tab3, _ovly_table - 16 # 0,2 32 #lnop #nop - fsmbi pbyte, 1 # 1,4 33 + fsmbi pbyte, 0x100 # 1,4 33 #nop; lnop #nop lqx vma, tab3, off3 # 1,6 35 @@ -351,7 +378,7 @@ __ovly_xfer_loop: #nop; lnop shli off4, buf3, 2 # 1,4 45 #lnop - ila tab4, _ovly_buf_table # 0,2 46 + ila tab4, _ovly_buf_table - 4 # 0,2 46 #lnop #nop; lnop #nop; lnop @@ -359,13 +386,14 @@ __ovly_xfer_loop: lqx map, tab4, off4 # 1,6 49 #nop cwx genwi, tab4, off4 # 1,4 50 -#nop; lnop + a addr4, tab4, off4 # 0,2 51 +#lnop #nop; lnop #nop; lnop #nop; lnop #nop - rotqby oldovl, map, off4 # 1,4 55 - nop + rotqby oldovl, map, addr4 # 1,4 55 +#nop shufb newmap, newovl, map, genwi # 0,4 56 #if MFC_TAG_ID < 16 ila newmask, 1 << MFC_TAG_ID # 0,2 57 @@ -375,7 +403,7 @@ __ovly_xfer_loop: #lnop #nop; lnop #nop; lnop - stqx newmap, tab4, off4 # 1,6 60 + stqd newmap, 0(addr4) # 1,6 60 /* Save app's tagmask, wait for DMA complete, restore mask. */ ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61 @@ -407,14 +435,30 @@ __ovly_xfer_loop: #nop; lnop andc pbit, pbit, zovl # 0,2 74 lqd save2, -32($sp) # 1,6 74 +#ifdef OVLY_IRQ_SAVE + ila irqtmp2, do_load90 # 0,2 75 +#lnop + andi irq_stat, irq_stat, 1 # 0,2 76 +#lnop +#else #nop; lnop #nop; lnop +#endif andc oldvma, oldvma, pbit # 0,2 77 lqd save1, -16($sp) # 1,6 77 -#nop; lnop - nop + nop # 0,0 78 +#lnop +#nop stqx oldvma, tab5, off5 # 1,6 79 +#nop +#ifdef OVLY_IRQ_SAVE + binze irq_stat, irqtmp2 # 1,4 80 +do_load90: +#nop + lqd save4, -64($sp) # 1,6 84 +#else #nop; lnop +#endif .global _ovly_debug_event .type _ovly_debug_event, @function @@ -422,6 +466,6 @@ _ovly_debug_event: nop /* Branch to target address. */ do_load99: - bi target # 1,4 81 + bi target # 1,4 81/85 .size __ovly_load, . - __ovly_load