binutils: support for the SPARC M8 processor
[deliverable/binutils-gdb.git] / sim / m32r / mloopx.in
index 6b084f4bfb1dcc7fc0be309892b5b6f9bcf3665e..ee818fbde27522f32d21929e5674f17dce921ebb 100644 (file)
@@ -1,24 +1,37 @@
 # Simulator main loop for m32rx. -*- C -*-
-# Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
+#
+# Copyright 1996-2017 Free Software Foundation, Inc.
 #
 # This file is part of the GNU Simulators.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 # Syntax:
-# /bin/sh mainloop.in init|support|{full,fast}-{extract,exec}-{scache,noscache}
+# /bin/sh mainloop.in command
+#
+# Command is one of:
+#
+# init
+# support
+# extract-{simple,scache,pbb}
+# {full,fast}-exec-{simple,scache,pbb}
+#
+# A target need only provide a "full" version of one of simple,scache,pbb.
+# If the target wants it can also provide a fast version of same, or if
+# the slow (full featured) version is `simple', then the fast version can be
+# one of scache/pbb.
+# A target can't provide more than this.
 
 # ??? After a few more ports are done, revisit.
 # Will eventually need to machine generate a lot of this.
@@ -29,170 +42,487 @@ xsupport)
 
 cat <<EOF
 
+/* Emit insns to write back the results of insns executed in parallel.
+   SC points to a sufficient number of scache entries for the writeback
+   handlers.
+   SC1/ID1 is the first insn (left slot, lower address).
+   SC2/ID2 is the second insn (right slot, higher address).  */
+
+static INLINE void
+emit_par_finish (SIM_CPU *current_cpu, PCADDR pc, SCACHE *sc,
+                SCACHE *sc1, const IDESC *id1, SCACHE *sc2, const IDESC *id2)
+{
+  ARGBUF *abuf;
+
+  abuf = &sc->argbuf;
+  id1 = id1->par_idesc;
+  abuf->fields.write.abuf = &sc1->argbuf;
+  @cpu@_fill_argbuf (current_cpu, abuf, id1, pc, 0);
+  /* no need to set trace_p,profile_p */
+#if 0 /* not currently needed for id2 since results written directly */
+  abuf = &sc[1].argbuf;
+  id2 = id2->par_idesc;
+  abuf->fields.write.abuf = &sc2->argbuf;
+  @cpu@_fill_argbuf (current_cpu, abuf, id2, pc + 2, 0);
+  /* no need to set trace_p,profile_p */
+#endif
+}
+
+static INLINE const IDESC *
+emit_16 (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
+        SCACHE *sc, int fast_p, int parallel_p)
+{
+  ARGBUF *abuf = &sc->argbuf;
+  const IDESC *id = @cpu@_decode (current_cpu, pc, insn, insn, abuf);
+
+  if (parallel_p)
+    id = id->par_idesc;
+  @cpu@_fill_argbuf (current_cpu, abuf, id, pc, fast_p);
+  return id;
+}
+
+static INLINE const IDESC *
+emit_full16 (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn, SCACHE *sc,
+            int trace_p, int profile_p)
+{
+  const IDESC *id;
+
+  @cpu@_emit_before (current_cpu, sc, pc, 1);
+  id = emit_16 (current_cpu, pc, insn, sc + 1, 0, 0);
+  @cpu@_emit_after (current_cpu, sc + 2, pc);
+  sc[1].argbuf.trace_p = trace_p;
+  sc[1].argbuf.profile_p = profile_p;
+  return id;
+}
+
+static INLINE const IDESC *
+emit_parallel (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
+              SCACHE *sc, int fast_p)
+{
+  const IDESC *id,*id2;
+
+  /* Emit both insns, then emit a finisher-upper.
+     We speed things up by handling the second insn serially
+     [not parallelly].  Then the writeback only has to deal
+     with the first insn.  */
+  /* ??? Revisit to handle exceptions right.  */
+
+  /* FIXME: No need to handle this parallely if second is nop.  */
+  id = emit_16 (current_cpu, pc, insn >> 16, sc, fast_p, 1);
+
+  /* Note that this can never be a cti.  No cti's go in the S pipeline.  */
+  id2 = emit_16 (current_cpu, pc + 2, insn & 0x7fff, sc + 1, fast_p, 0);
+
+  /* Set sc/snc insns notion of where to skip to.  */
+  if (IDESC_SKIP_P (id))
+    SEM_SKIP_COMPILE (current_cpu, sc, 1);
+
+  /* Emit code to finish executing the semantics
+     (write back the results).  */
+  emit_par_finish (current_cpu, pc, sc + 2, sc, id, sc + 1, id2);
+
+  return id;
+}
+
+static INLINE const IDESC *
+emit_full_parallel (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
+                   SCACHE *sc, int trace_p, int profile_p)
+{
+  const IDESC *id,*id2;
+
+  /* Emit both insns, then emit a finisher-upper.
+     We speed things up by handling the second insn serially
+     [not parallelly].  Then the writeback only has to deal
+     with the first insn.  */
+  /* ??? Revisit to handle exceptions right.  */
+
+  @cpu@_emit_before (current_cpu, sc, pc, 1);
+
+  /* FIXME: No need to handle this parallelly if second is nop.  */
+  id = emit_16 (current_cpu, pc, insn >> 16, sc + 1, 0, 1);
+  sc[1].argbuf.trace_p = trace_p;
+  sc[1].argbuf.profile_p = profile_p;
+
+  @cpu@_emit_before (current_cpu, sc + 2, pc, 0);
+
+  /* Note that this can never be a cti.  No cti's go in the S pipeline.  */
+  id2 = emit_16 (current_cpu, pc + 2, insn & 0x7fff, sc + 3, 0, 0);
+  sc[3].argbuf.trace_p = trace_p;
+  sc[3].argbuf.profile_p = profile_p;
+
+  /* Set sc/snc insns notion of where to skip to.  */
+  if (IDESC_SKIP_P (id))
+    SEM_SKIP_COMPILE (current_cpu, sc, 4);
+
+  /* Emit code to finish executing the semantics
+     (write back the results).  */
+  emit_par_finish (current_cpu, pc, sc + 4, sc + 1, id, sc + 3, id2);
+
+  @cpu@_emit_after (current_cpu, sc + 5, pc);
+
+  return id;
+}
+
+static INLINE const IDESC *
+emit_32 (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn,
+        SCACHE *sc, int fast_p)
+{
+  ARGBUF *abuf = &sc->argbuf;
+  const IDESC *id = @cpu@_decode (current_cpu, pc,
+                                 (USI) insn >> 16, insn, abuf);
+
+  @cpu@_fill_argbuf (current_cpu, abuf, id, pc, fast_p);
+  return id;
+}
+
+static INLINE const IDESC *
+emit_full32 (SIM_CPU *current_cpu, PCADDR pc, CGEN_INSN_INT insn, SCACHE *sc,
+            int trace_p, int profile_p)
+{
+  const IDESC *id;
+
+  @cpu@_emit_before (current_cpu, sc, pc, 1);
+  id = emit_32 (current_cpu, pc, insn, sc + 1, 0);
+  @cpu@_emit_after (current_cpu, sc + 2, pc);
+  sc[1].argbuf.trace_p = trace_p;
+  sc[1].argbuf.profile_p = profile_p;
+  return id;
+}
+
 EOF
 
 ;;
 
 xinit)
 
-cat <<EOF
-  const IDESC *d1,*d2;
-  ARGBUF abufs[MAX_PARALLEL_INSNS];
-  PAREXEC pbufs[MAX_PARALLEL_INSNS];
-EOF
+# Nothing needed.
 
 ;;
 
-xfull-extract-* | xfast-extract-*)
+xextract-pbb)
+
+# Inputs:  current_cpu, pc, sc, max_insns, FAST_P
+# Outputs: sc, pc
+# sc must be left pointing past the last created entry.
+# pc must be left pointing past the last created entry.
+# If the pbb is terminated by a cti insn, SET_CTI_VPC(sc) must be called
+# to record the vpc of the cti insn.
+# SET_INSN_COUNT(n) must be called to record number of real insns.
 
 cat <<EOF
 {
-  PCADDR pc = CPU (h_pc);
-
-  /* ??? This code isn't very fast.  Let's get it working first.  */
+  const IDESC *idesc;
+  int icount = 0;
 
   if ((pc & 3) != 0)
     {
-      USI insn = GETIMEMUHI (current_cpu, pc);
-      insn &= 0x7fff;
-      d1 = m32rx_decode (current_cpu, pc, insn);
-      abufs[0].insn = insn;
-      abufs[0].idesc = d1;
-      abufs[0].addr = pc; /* FIXME: wip */
-      icount = 1;
+      /* This occurs when single stepping and when compiling the not-taken
+        part of conditional branches.  */
+      UHI insn = GETIMEMUHI (current_cpu, pc);
+      int trace_p = PC_IN_TRACE_RANGE_P (current_cpu, pc);
+      int profile_p = PC_IN_PROFILE_RANGE_P (current_cpu, pc);
+      SCACHE *cti_sc; /* ??? tmp hack */
+
+      /* A parallel insn isn't allowed here, but we don't mind nops.
+        ??? We need to wait until the insn is executed before signalling
+        the error, for situations where such signalling is wanted.  */
+#if 0
+      if ((insn & 0x8000) != 0
+         && (insn & 0x7fff) != 0x7000) /* parallel nops are ok */
+       sim_engine_invalid_insn (current_cpu, pc, 0);
+#endif
+
+      /* Only emit before/after handlers if necessary.  */
+      if (FAST_P || (! trace_p && ! profile_p))
+       {
+         idesc = emit_16 (current_cpu, pc, insn & 0x7fff, sc, FAST_P, 0);
+         cti_sc = sc;
+         ++sc;
+         --max_insns;
+       }
+      else
+       {
+         idesc = emit_full16 (current_cpu, pc, insn & 0x7fff, sc,
+                              trace_p, profile_p);
+         cti_sc = sc + 1;
+         sc += 3;
+         max_insns -= 3;
+       }
+      ++icount;
+      pc += 2;
+      if (IDESC_CTI_P (idesc))
+       {
+         SET_CTI_VPC (cti_sc);
+         goto Finish;
+       }
     }
-  else
+
+  /* There are two copies of the compiler: full(!fast) and fast.
+     The "full" case emits before/after handlers for each insn.
+     Having two copies of this code is a tradeoff, having one copy
+     seemed a bit more difficult to read (due to constantly testing
+     FAST_P).  ??? On the other hand, with address ranges we'll want to
+     omit before/after handlers for unwanted insns.  Having separate loops
+     for FAST/!FAST avoids constantly doing the test in the loop, but
+     typically FAST_P is a constant and such tests will get optimized out.  */
+
+  if (FAST_P)
     {
-      USI insn = GETIMEMUSI (current_cpu, pc);
-      if ((SI) insn < 0)
+      while (max_insns > 0)
        {
-         d1 = m32rx_decode (current_cpu, pc, insn >> 16);
-         abufs[0].insn = insn;
-         abufs[0].idesc = d1;
-         abufs[0].addr = pc; /* FIXME: wip */
-         icount = 1;
+         USI insn = GETIMEMUSI (current_cpu, pc);
+         if ((SI) insn < 0)
+           {
+             /* 32 bit insn */
+             idesc = emit_32 (current_cpu, pc, insn, sc, 1);
+             ++sc;
+             --max_insns;
+             ++icount;
+             pc += 4;
+             if (IDESC_CTI_P (idesc))
+               {
+                 SET_CTI_VPC (sc - 1);
+                 break;
+               }
+           }
+         else
+           {
+             if ((insn & 0x8000) != 0) /* parallel? */
+               {
+                 int up_count;
+
+                 if (((insn >> 16) & 0xfff0) == 0x10f0)
+                   {
+                     /* FIXME: No need to handle this sequentially if system
+                        calls will be able to execute after second insn in
+                        parallel. ( trap #num || insn ) */
+                     /* insn */
+                     idesc = emit_16 (current_cpu, pc + 2, insn & 0x7fff,
+                                      sc, 1, 0);
+                     /* trap */
+                     emit_16 (current_cpu, pc, insn >> 16, sc + 1, 1, 0);
+                     up_count = 2;
+                   }
+                 else
+                   {
+                     /* Yep.  Here's the "interesting" [sic] part.  */
+                     idesc = emit_parallel (current_cpu, pc, insn, sc, 1);
+                     up_count = 3;
+                   }
+                 sc += up_count;
+                 max_insns -= up_count;
+                 icount += 2;
+                 pc += 4;
+                 if (IDESC_CTI_P (idesc))
+                   {
+                     SET_CTI_VPC (sc - up_count);
+                     break;
+                   }
+               }
+             else /* 2 serial 16 bit insns */
+               {
+                 idesc = emit_16 (current_cpu, pc, insn >> 16, sc, 1, 0);
+                 ++sc;
+                 --max_insns;
+                 ++icount;
+                 pc += 2;
+                 if (IDESC_CTI_P (idesc))
+                   {
+                     SET_CTI_VPC (sc - 1);
+                     break;
+                   }
+                 /* While we're guaranteed that there's room to extract the
+                    insn, when single stepping we can't; the pbb must stop
+                    after the first insn.  */
+                 if (max_insns == 0)
+                   break;
+                 idesc = emit_16 (current_cpu, pc, insn & 0x7fff, sc, 1, 0);
+                 ++sc;
+                 --max_insns;
+                 ++icount;
+                 pc += 2;
+                 if (IDESC_CTI_P (idesc))
+                   {
+                     SET_CTI_VPC (sc - 1);
+                     break;
+                   }
+               }
+           }
        }
-      else
+    }
+  else /* ! FAST_P */
+    {
+      while (max_insns > 0)
        {
-         if (insn & 0x8000)
+         USI insn = GETIMEMUSI (current_cpu, pc);
+         int trace_p = PC_IN_TRACE_RANGE_P (current_cpu, pc);
+         int profile_p = PC_IN_PROFILE_RANGE_P (current_cpu, pc);
+         SCACHE *cti_sc; /* ??? tmp hack */
+         if ((SI) insn < 0)
            {
-             d1 = m32rx_decode (current_cpu, pc, insn >> 16);
-             abufs[0].insn = insn >> 16;
-             abufs[0].idesc = d1;
-             abufs[0].addr = pc; /* FIXME: wip */
-             d2 = m32rx_decode (current_cpu, pc + 2, insn & 0x7fff);
-             abufs[1].insn = insn & 0x7fff;
-             abufs[1].idesc = d2;
-             abufs[1].addr = pc + 2; /* FIXME: wip */
-             icount = 2;
+             /* 32 bit insn
+                Only emit before/after handlers if necessary.  */
+             if (trace_p || profile_p)
+               {
+                 idesc = emit_full32 (current_cpu, pc, insn, sc,
+                                      trace_p, profile_p);
+                 cti_sc = sc + 1;
+                 sc += 3;
+                 max_insns -= 3;
+               }
+             else
+               {
+                 idesc = emit_32 (current_cpu, pc, insn, sc, 0);
+                 cti_sc = sc;
+                 ++sc;
+                 --max_insns;
+               }
+             ++icount;
+             pc += 4;
+             if (IDESC_CTI_P (idesc))
+               {
+                 SET_CTI_VPC (cti_sc);
+                 break;
+               }
            }
          else
            {
-             d1 = m32rx_decode (current_cpu, pc, insn >> 16);
-             abufs[0].insn = insn >> 16;
-             abufs[0].idesc = d1;
-             abufs[0].addr = pc; /* FIXME: wip */
-             icount = 1;
+             if ((insn & 0x8000) != 0) /* parallel? */
+               {
+                 /* Yep.  Here's the "interesting" [sic] part.
+                    Only emit before/after handlers if necessary.  */
+                 if (trace_p || profile_p)
+                   {
+                     if (((insn >> 16) & 0xfff0) == 0x10f0)
+                       {
+                         /* FIXME: No need to handle this sequentially if
+                            system calls will be able to execute after second
+                            insn in parallel. ( trap #num || insn ) */
+                         /* insn */
+                         idesc = emit_full16 (current_cpu, pc + 2,
+                                              insn & 0x7fff, sc, 0, 0);
+                         /* trap */
+                         emit_full16 (current_cpu, pc, insn >> 16, sc + 3,
+                                      0, 0);
+                       }
+                     else
+                       {
+                         idesc = emit_full_parallel (current_cpu, pc, insn,
+                                                     sc, trace_p, profile_p);
+                       }
+                     cti_sc = sc + 1;
+                     sc += 6;
+                     max_insns -= 6;
+                   }
+                 else
+                   {
+                     int up_count;
+
+                     if (((insn >> 16) & 0xfff0) == 0x10f0)
+                       {
+                          /* FIXME: No need to handle this sequentially if
+                             system calls will be able to execute after second
+                             insn in parallel. ( trap #num || insn ) */
+                          /* insn */
+                          idesc = emit_16 (current_cpu, pc + 2, insn & 0x7fff,
+                                           sc, 0, 0);
+                          /* trap */
+                          emit_16 (current_cpu, pc, insn >> 16, sc + 1, 0, 0);
+                          up_count = 2;
+                       }
+                     else
+                       {
+                         idesc = emit_parallel (current_cpu, pc, insn, sc, 0);
+                          up_count = 3;
+                       }
+                     cti_sc = sc;
+                     sc += up_count;
+                     max_insns -= up_count;
+                   }
+                 icount += 2;
+                 pc += 4;
+                 if (IDESC_CTI_P (idesc))
+                   {
+                     SET_CTI_VPC (cti_sc);
+                     break;
+                   }
+               }
+             else /* 2 serial 16 bit insns */
+               {
+                 /* Only emit before/after handlers if necessary.  */
+                 if (trace_p || profile_p)
+                   {
+                     idesc = emit_full16 (current_cpu, pc, insn >> 16, sc,
+                                          trace_p, profile_p);
+                     cti_sc = sc + 1;
+                     sc += 3;
+                     max_insns -= 3;
+                   }
+                 else
+                   {
+                     idesc = emit_16 (current_cpu, pc, insn >> 16, sc, 0, 0);
+                     cti_sc = sc;
+                     ++sc;
+                     --max_insns;
+                   }
+                 ++icount;
+                 pc += 2;
+                 if (IDESC_CTI_P (idesc))
+                   {
+                     SET_CTI_VPC (cti_sc);
+                     break;
+                   }
+                 /* While we're guaranteed that there's room to extract the
+                    insn, when single stepping we can't; the pbb must stop
+                    after the first insn.  */
+                 if (max_insns <= 0)
+                   break;
+                 /* Use the same trace/profile address for the 2nd insn.
+                    Saves us having to compute it and they come in pairs
+                    anyway (e.g. can never branch to the 2nd insn).  */
+                 if (trace_p || profile_p)
+                   {
+                     idesc = emit_full16 (current_cpu, pc, insn & 0x7fff, sc,
+                                          trace_p, profile_p);
+                     cti_sc = sc + 1;
+                     sc += 3;
+                     max_insns -= 3;
+                   }
+                 else
+                   {
+                     idesc = emit_16 (current_cpu, pc, insn & 0x7fff, sc, 0, 0);
+                     cti_sc = sc;
+                     ++sc;
+                     --max_insns;
+                   }
+                 ++icount;
+                 pc += 2;
+                 if (IDESC_CTI_P (idesc))
+                   {
+                     SET_CTI_VPC (cti_sc);
+                     break;
+                   }
+               }
            }
        }
     }
 
-  {
-    int icount2 = icount;
-    USI insn = abufs[0].insn;
-    const IDESC *decode = d1;
-    /* decode, par_exec, and insn are refered to by readx.c.  */
-    PAREXEC *par_exec = &pbufs[0];
-    do
-      {
-#define DEFINE_SWITCH
-#include "readx.c"
-
-       decode = d2;
-       insn = abufs[1].insn;
-       ++par_exec;
-      }
-    while (--icount2 != 0);
-  }
+ Finish:
+  SET_INSN_COUNT (icount);
 }
 EOF
 
 ;;
 
-xfull-exec-* | xfast-exec-*)
-
-cat <<EOF
-{
-  SEM_ARG sem_arg = &abufs[0];
-  PAREXEC *par_exec = &pbufs[0];
-  PCADDR new_pc;
+xfull-exec-pbb)
 
-#if 0 /* wip */
-  /* If doing parallel execution, verify insns are in the right pipeline.  */
-  if (icount == 2)
-    {
-      ...
-    }
-#endif
+# Inputs: current_cpu, vpc, FAST_P
+# Outputs: vpc
+# vpc is the virtual program counter.
 
-  m32r_model_init_insn_cycles (current_cpu, 1);
-  TRACE_INSN_INIT (current_cpu, 1);
-  TRACE_INSN (current_cpu, d1->opcode, sem_arg, CPU (h_pc));
-  new_pc = (*d1->sem_full) (current_cpu, sem_arg, par_exec);
-  m32r_model_update_insn_cycles (current_cpu, icount == 1);
-  TRACE_INSN_FINI (current_cpu, icount == 1);
-
-  /* The result of the semantic fn is one of:
-     - next address, branch only
-     - NEW_PC_SKIP, sc/snc insn
-     - NEW_PC_2, 2 byte non-branch non-sc/snc insn
-     - NEW_PC_4, 4 byte non-branch insn
-     */
-
-  /* The tests are ordered to try to favor the more frequent cases, while
-     keeping the over all costs down.  */
-  if (new_pc == NEW_PC_4)
-    CPU (h_pc) += 4;
-  else if (icount == 2)
-    {
-      /* Note that we only get here if doing parallel execution.  */
-
-      if (new_pc == NEW_PC_SKIP)
-       {
-         /* ??? Need generic notion of bypassing an insn for the name of
-            this macro.  Annulled?  On the otherhand such tracing can go
-            in the sc/snc semantic fn.  */
-         ; /*TRACE_INSN_SKIPPED (current_cpu);*/
-         CPU (h_pc) += 4;
-       }
-      else
-        {
-         PCADDR pc2;
-
-         ++sem_arg;
-         ++par_exec;
-         m32r_model_init_insn_cycles (current_cpu, 0);
-         TRACE_INSN_INIT (current_cpu, 0);
-         TRACE_INSN (current_cpu, d2->opcode, sem_arg, CPU (h_pc) + 2);
-         /* pc2 isn't used.  It's assigned a value for debugging.  */
-         pc2 = (*d2->sem_full) (current_cpu, sem_arg, par_exec);
-         m32r_model_update_insn_cycles (current_cpu, 1);
-         TRACE_INSN_FINI (current_cpu, 1);
-
-         if (NEW_PC_BRANCH_P (new_pc))
-           CPU (h_pc) = new_pc;
-         else
-           CPU (h_pc) += 4;
-       }
-
-      /* Update count of parallel insns executed.  */
-      PROFILE_COUNT_PARINSNS (current_cpu);
-    }
-  else if (NEW_PC_BRANCH_P (new_pc))
-    CPU (h_pc) = new_pc;
-  else
-    CPU (h_pc) += 2;
-}
+cat <<EOF
+#define DEFINE_SWITCH
+#include "semx-switch.c"
 EOF
 
 ;;
This page took 0.036567 seconds and 4 git commands to generate.