1 /* rx.c --- opcode semantics for stand-alone RX simulator.
3 Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
4 Contributed by Red Hat, Inc.
6 This file is part of the GNU simulators.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include "opcode/rx.h"
36 static const char * id_names
[] = {
38 "RXO_mov", /* d = s (signed) */
39 "RXO_movbi", /* d = [s,s2] (signed) */
40 "RXO_movbir", /* [s,s2] = d (signed) */
41 "RXO_pushm", /* s..s2 */
42 "RXO_popm", /* s..s2 */
43 "RXO_xchg", /* s <-> d */
44 "RXO_stcc", /* d = s if cond(s2) */
45 "RXO_rtsd", /* rtsd, 1=imm, 2-0 = reg if reg type */
47 /* These are all either d OP= s or, if s2 is set, d = s OP s2. Note
48 that d may be "None". */
61 "RXO_adc", /* d = d + s + carry */
62 "RXO_sbb", /* d = d - s - ~carry */
63 "RXO_abs", /* d = |s| */
64 "RXO_max", /* d = max(d,s) */
65 "RXO_min", /* d = min(d,s) */
66 "RXO_emul", /* d:64 = d:32 * s */
67 "RXO_emulu", /* d:64 = d:32 * s (unsigned) */
69 "RXO_rolc", /* d <<= 1 through carry */
70 "RXO_rorc", /* d >>= 1 through carry*/
71 "RXO_rotl", /* d <<= #s without carry */
72 "RXO_rotr", /* d >>= #s without carry*/
73 "RXO_revw", /* d = revw(s) */
74 "RXO_revl", /* d = revl(s) */
75 "RXO_branch", /* pc = d if cond(s) */
76 "RXO_branchrel",/* pc += d if cond(s) */
77 "RXO_jsr", /* pc = d */
78 "RXO_jsrrel", /* pc += d */
104 "RXO_sat", /* sat(d) */
107 "RXO_fadd", /* d op= s */
116 "RXO_bset", /* d |= (1<<s) */
117 "RXO_bclr", /* d &= ~(1<<s) */
118 "RXO_btst", /* s & (1<<s2) */
119 "RXO_bnot", /* d ^= (1<<s) */
120 "RXO_bmcc", /* d<s> = cond(s2) */
122 "RXO_clrpsw", /* flag index in d */
123 "RXO_setpsw", /* flag index in d */
124 "RXO_mvtipl", /* new IPL in s */
128 "RXO_rtd", /* undocumented */
130 "RXO_dbt", /* undocumented */
131 "RXO_int", /* vector id in s */
135 "RXO_sccnd", /* d = cond(s) ? 1 : 0 */
138 static const char * optype_names
[] = {
140 "#Imm", /* #addend */
142 "[Rn]", /* [Rn + addend] */
145 " cc ", /* eq, gtu, etc */
146 "Flag", /* [UIOSZC] */
147 "RbRi" /* [Rb + scale * Ri] */
150 #define N_RXO (sizeof(id_names)/sizeof(id_names[0]))
151 #define N_RXT (sizeof(optype_names)/sizeof(optype_names[0]))
154 static unsigned long long benchmark_start_cycle
;
155 static unsigned long long benchmark_end_cycle
;
157 static int op_cache
[N_RXT
][N_RXT
][N_RXT
];
158 static int op_cache_rev
[N_MAP
];
159 static int op_cache_idx
= 0;
162 op_lookup (int a
, int b
, int c
)
164 if (op_cache
[a
][b
][c
])
165 return op_cache
[a
][b
][c
];
167 if (op_cache_idx
>= N_MAP
)
169 printf("op_cache_idx exceeds %d\n", N_MAP
);
172 op_cache
[a
][b
][c
] = op_cache_idx
;
173 op_cache_rev
[op_cache_idx
] = (a
<<8) | (b
<<4) | c
;
178 op_cache_string (int map
)
181 static char cb
[5][20];
184 map
= op_cache_rev
[map
];
189 sprintf(cb
[ci
], "%s %s %s", optype_names
[a
], optype_names
[b
], optype_names
[c
]);
193 static unsigned long long cycles_per_id
[N_RXO
][N_MAP
];
194 static unsigned long long times_per_id
[N_RXO
][N_MAP
];
195 static unsigned long long memory_stalls
;
196 static unsigned long long register_stalls
;
197 static unsigned long long branch_stalls
;
198 static unsigned long long branch_alignment_stalls
;
199 static unsigned long long fast_returns
;
201 static unsigned long times_per_pair
[N_RXO
][N_MAP
][N_RXO
][N_MAP
];
202 static int prev_opcode_id
= RXO_unknown
;
209 #endif /* CYCLE_STATS */
212 #ifdef CYCLE_ACCURATE
214 static int new_rt
= -1;
216 /* Number of cycles to add if an insn spans an 8-byte boundary. */
217 static int branch_alignment_penalty
= 0;
221 static int running_benchmark
= 1;
223 #define tprintf if (trace && running_benchmark) printf
225 jmp_buf decode_jmp_buf
;
226 unsigned int rx_cycles
= 0;
228 #ifdef CYCLE_ACCURATE
229 /* If nonzero, memory was read at some point and cycle latency might
231 static int memory_source
= 0;
232 /* If nonzero, memory was written and extra cycles might be
234 static int memory_dest
= 0;
237 cycles (int throughput
)
239 tprintf("%d cycles\n", throughput
);
240 regs
.cycle_count
+= throughput
;
243 /* Number of execution (E) cycles the op uses. For memory sources, we
244 include the load micro-op stall as two extra E cycles. */
245 #define E(c) cycles (memory_source ? c + 2 : c)
246 #define E1 cycles (1)
247 #define E2 cycles (2)
248 #define EBIT cycles (memory_source ? 2 : 1)
250 /* Check to see if a read latency must be applied for a given register. */
254 tprintf("register %d load stall\n", r); \
255 regs.cycle_count ++; \
256 STATS(register_stalls ++); \
263 tprintf ("Rt now %d\n", r); \
267 #else /* !CYCLE_ACCURATE */
277 #endif /* else CYCLE_ACCURATE */
279 static int size2bytes
[] = {
280 4, 1, 1, 1, 2, 2, 2, 3, 4
287 #define rx_abort() _rx_abort(__FILE__, __LINE__)
289 _rx_abort (const char *file
, int line
)
291 if (strrchr (file
, '/'))
292 file
= strrchr (file
, '/') + 1;
293 fprintf(stderr
, "abort at %s:%d\n", file
, line
);
297 static unsigned char *get_byte_base
;
298 static RX_Opcode_Decoded
**decode_cache_base
;
299 static SI get_byte_page
;
305 decode_cache_base
= 0;
310 maybe_get_mem_page (SI tpc
)
312 if (((tpc
^ get_byte_page
) & NONPAGE_MASK
) || enable_counting
)
314 get_byte_page
= tpc
& NONPAGE_MASK
;
315 get_byte_base
= rx_mem_ptr (get_byte_page
, MPA_READING
) - get_byte_page
;
316 decode_cache_base
= rx_mem_decode_cache (get_byte_page
) - get_byte_page
;
320 /* This gets called a *lot* so optimize it. */
322 rx_get_byte (void *vdata
)
324 RX_Data
*rx_data
= (RX_Data
*)vdata
;
325 SI tpc
= rx_data
->dpc
;
327 /* See load.c for an explanation of this. */
331 maybe_get_mem_page (tpc
);
334 return get_byte_base
[tpc
];
338 get_op (const RX_Opcode_Decoded
*rd
, int i
)
340 const RX_Opcode_Operand
*o
= rd
->op
+ i
;
345 case RX_Operand_None
:
348 case RX_Operand_Immediate
: /* #addend */
351 case RX_Operand_Register
: /* Rn */
353 rv
= get_reg (o
->reg
);
356 case RX_Operand_Predec
: /* [-Rn] */
357 put_reg (o
->reg
, get_reg (o
->reg
) - size2bytes
[o
->size
]);
359 case RX_Operand_Postinc
: /* [Rn+] */
360 case RX_Operand_Indirect
: /* [Rn + addend] */
361 case RX_Operand_TwoReg
: /* [Rn + scale * R2] */
362 #ifdef CYCLE_ACCURATE
364 if (o
->type
== RX_Operand_TwoReg
)
367 if (regs
.m2m
== M2M_BOTH
)
369 tprintf("src memory stall\n");
380 if (o
->type
== RX_Operand_TwoReg
)
381 addr
= get_reg (o
->reg
) * size2bytes
[rd
->size
] + get_reg (rd
->op
[2].reg
);
383 addr
= get_reg (o
->reg
) + o
->addend
;
390 case RX_Byte
: /* undefined extension */
393 rv
= mem_get_qi (addr
);
396 case RX_Word
: /* undefined extension */
399 rv
= mem_get_hi (addr
);
403 rv
= mem_get_psi (addr
);
407 rv
= mem_get_si (addr
);
411 if (o
->type
== RX_Operand_Postinc
)
412 put_reg (o
->reg
, get_reg (o
->reg
) + size2bytes
[o
->size
]);
416 case RX_Operand_Condition
: /* eq, gtu, etc */
417 return condition_true (o
->reg
);
419 case RX_Operand_Flag
: /* [UIOSZC] */
420 return (regs
.r_psw
& (1 << o
->reg
)) ? 1 : 0;
423 /* if we've gotten here, we need to clip/extend the value according
430 case RX_Byte
: /* undefined extension */
431 rv
|= 0xdeadbe00; /* keep them honest */
439 rv
= sign_ext (rv
, 8);
442 case RX_Word
: /* undefined extension */
443 rv
|= 0xdead0000; /* keep them honest */
451 rv
= sign_ext (rv
, 16);
465 put_op (const RX_Opcode_Decoded
*rd
, int i
, int v
)
467 const RX_Opcode_Operand
*o
= rd
->op
+ i
;
473 if (o
->type
!= RX_Operand_Register
)
477 case RX_Byte
: /* undefined extension */
478 v
|= 0xdeadbe00; /* keep them honest */
489 case RX_Word
: /* undefined extension */
490 v
|= 0xdead0000; /* keep them honest */
498 v
= sign_ext (v
, 16);
511 case RX_Operand_None
:
512 /* Opcodes like TST and CMP use this. */
515 case RX_Operand_Immediate
: /* #addend */
516 case RX_Operand_Condition
: /* eq, gtu, etc */
519 case RX_Operand_Register
: /* Rn */
524 case RX_Operand_Predec
: /* [-Rn] */
525 put_reg (o
->reg
, get_reg (o
->reg
) - size2bytes
[o
->size
]);
527 case RX_Operand_Postinc
: /* [Rn+] */
528 case RX_Operand_Indirect
: /* [Rn + addend] */
529 case RX_Operand_TwoReg
: /* [Rn + scale * R2] */
531 #ifdef CYCLE_ACCURATE
532 if (regs
.m2m
== M2M_BOTH
)
534 tprintf("dst memory stall\n");
544 if (o
->type
== RX_Operand_TwoReg
)
545 addr
= get_reg (o
->reg
) * size2bytes
[rd
->size
] + get_reg (rd
->op
[2].reg
);
547 addr
= get_reg (o
->reg
) + o
->addend
;
554 case RX_Byte
: /* undefined extension */
557 mem_put_qi (addr
, v
);
560 case RX_Word
: /* undefined extension */
563 mem_put_hi (addr
, v
);
567 mem_put_psi (addr
, v
);
571 mem_put_si (addr
, v
);
575 if (o
->type
== RX_Operand_Postinc
)
576 put_reg (o
->reg
, get_reg (o
->reg
) + size2bytes
[o
->size
]);
580 case RX_Operand_Flag
: /* [UIOSZC] */
582 regs
.r_psw
|= (1 << o
->reg
);
584 regs
.r_psw
&= ~(1 << o
->reg
);
589 #define PD(x) put_op (opcode, 0, x)
590 #define PS(x) put_op (opcode, 1, x)
591 #define PS2(x) put_op (opcode, 2, x)
592 #define GD() get_op (opcode, 0)
593 #define GS() get_op (opcode, 1)
594 #define GS2() get_op (opcode, 2)
595 #define DSZ() size2bytes[opcode->op[0].size]
596 #define SSZ() size2bytes[opcode->op[0].size]
597 #define S2SZ() size2bytes[opcode->op[0].size]
599 /* "Universal" sources. */
600 #define US1() ((opcode->op[2].type == RX_Operand_None) ? GD() : GS())
601 #define US2() ((opcode->op[2].type == RX_Operand_None) ? GS() : GS2())
606 int rsp
= get_reg (sp
);
609 mem_put_si (rsp
, val
);
612 /* Just like the above, but tag the memory as "pushed pc" so if anyone
613 tries to write to it, it will cause an error. */
617 int rsp
= get_reg (sp
);
620 mem_put_si (rsp
, val
);
621 mem_set_content_range (rsp
, rsp
+3, MC_PUSHED_PC
);
628 int rsp
= get_reg (sp
);
629 rv
= mem_get_si (rsp
);
639 int rsp
= get_reg (sp
);
640 if (mem_get_content_type (rsp
) != MC_PUSHED_PC
)
641 execution_error (SIM_ERR_CORRUPT_STACK
, rsp
);
642 rv
= mem_get_si (rsp
);
643 mem_set_content_range (rsp
, rsp
+3, MC_UNINIT
);
649 #define MATH_OP(vop,c) \
653 ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \
654 tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \
655 ma = sign_ext (uma, DSZ() * 8); \
656 mb = sign_ext (umb, DSZ() * 8); \
657 sll = (long long) ma vop (long long) mb vop c; \
658 tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \
659 set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \
664 #define LOGIC_OP(vop) \
669 tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \
675 #define SHIFT_OP(val, type, count, OP, carry_mask) \
680 tprintf("%lld " #OP " %d\n", val, count); \
681 for (i = 0; i < count; i ++) \
683 c = val & carry_mask; \
687 set_oszc (val, 4, c); \
713 fop_fadd (fp_t s1
, fp_t s2
, fp_t
*d
)
715 *d
= rxfp_add (s1
, s2
);
720 fop_fmul (fp_t s1
, fp_t s2
, fp_t
*d
)
722 *d
= rxfp_mul (s1
, s2
);
727 fop_fdiv (fp_t s1
, fp_t s2
, fp_t
*d
)
729 *d
= rxfp_div (s1
, s2
);
734 fop_fsub (fp_t s1
, fp_t s2
, fp_t
*d
)
736 *d
= rxfp_sub (s1
, s2
);
740 #define FPPENDING() (regs.r_fpsw & (FPSWBITS_CE | (FPSWBITS_FMASK & (regs.r_fpsw << FPSW_EFSH))))
741 #define FPCLEAR() regs.r_fpsw &= FPSWBITS_CLEAR
744 return do_fp_exception (opcode_pc)
746 #define FLOAT_OP(func) \
753 do_store = fop_##func (fa, fb, &fc); \
754 tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \
759 if ((fc & 0x80000000UL) != 0) \
761 if ((fc & 0x7fffffffUL) == 0) \
763 set_flags (FLAGBIT_S | FLAGBIT_Z, mb); \
766 #define carry (FLAG_C ? 1 : 0)
772 } exception_info
[] = {
773 { 0xFFFFFFD0UL
, "priviledged opcode", SIGILL
},
774 { 0xFFFFFFD4UL
, "access violation", SIGSEGV
},
775 { 0xFFFFFFDCUL
, "undefined opcode", SIGILL
},
776 { 0xFFFFFFE4UL
, "floating point", SIGFPE
}
778 #define EX_PRIVILEDGED 0
780 #define EX_UNDEFINED 2
781 #define EX_FLOATING 3
782 #define EXCEPTION(n) \
783 return generate_exception (n, opcode_pc)
785 #define PRIVILEDGED() \
787 EXCEPTION (EX_PRIVILEDGED)
790 generate_exception (unsigned long type
, SI opcode_pc
)
792 SI old_psw
, old_pc
, new_pc
;
794 new_pc
= mem_get_si (exception_info
[type
].vaddr
);
795 /* 0x00020000 is the value used to initialise the known
796 exception vectors (see rx.ld), but it is a reserved
797 area of memory so do not try to access it, and if the
798 value has not been changed by the program then the
799 vector has not been installed. */
800 if (new_pc
== 0 || new_pc
== 0x00020000)
803 return RX_MAKE_STOPPED (exception_info
[type
].signal
);
805 fprintf(stderr
, "Unhandled %s exception at pc = %#lx\n",
806 exception_info
[type
].str
, (unsigned long) opcode_pc
);
807 if (type
== EX_FLOATING
)
809 int mask
= FPPENDING ();
810 fprintf (stderr
, "Pending FP exceptions:");
811 if (mask
& FPSWBITS_FV
)
812 fprintf(stderr
, " Invalid");
813 if (mask
& FPSWBITS_FO
)
814 fprintf(stderr
, " Overflow");
815 if (mask
& FPSWBITS_FZ
)
816 fprintf(stderr
, " Division-by-zero");
817 if (mask
& FPSWBITS_FU
)
818 fprintf(stderr
, " Underflow");
819 if (mask
& FPSWBITS_FX
)
820 fprintf(stderr
, " Inexact");
821 if (mask
& FPSWBITS_CE
)
822 fprintf(stderr
, " Unimplemented");
823 fprintf(stderr
, "\n");
825 return RX_MAKE_EXITED (1);
828 tprintf ("Triggering %s exception\n", exception_info
[type
].str
);
830 old_psw
= regs
.r_psw
;
831 regs
.r_psw
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
836 return RX_MAKE_STEPPED ();
840 generate_access_exception (void)
844 rv
= generate_exception (EX_ACCESS
, regs
.r_pc
);
846 longjmp (decode_jmp_buf
, rv
);
850 do_fp_exception (unsigned long opcode_pc
)
853 EXCEPTION (EX_FLOATING
);
854 return RX_MAKE_STEPPED ();
858 op_is_memory (const RX_Opcode_Decoded
*rd
, int i
)
860 switch (rd
->op
[i
].type
)
862 case RX_Operand_Predec
:
863 case RX_Operand_Postinc
:
864 case RX_Operand_Indirect
:
870 #define OM(i) op_is_memory (opcode, i)
872 #define DO_RETURN(x) { longjmp (decode_jmp_buf, x); }
877 unsigned int uma
=0, umb
=0;
880 unsigned long long ll
;
882 unsigned long opcode_pc
;
884 const RX_Opcode_Decoded
*opcode
;
886 unsigned long long prev_cycle_count
;
888 #ifdef CYCLE_ACCURATE
893 prev_cycle_count
= regs
.cycle_count
;
896 #ifdef CYCLE_ACCURATE
903 maybe_get_mem_page (regs
.r_pc
);
905 opcode_pc
= regs
.r_pc
;
907 /* Note that we don't word-swap this point, there's no point. */
908 if (decode_cache_base
[opcode_pc
] == NULL
)
910 RX_Opcode_Decoded
*opcode_w
;
911 rx_data
.dpc
= opcode_pc
;
912 opcode_w
= decode_cache_base
[opcode_pc
] = calloc (1, sizeof (RX_Opcode_Decoded
));
913 opcode_size
= rx_decode_opcode (opcode_pc
, opcode_w
,
914 rx_get_byte
, &rx_data
);
919 opcode
= decode_cache_base
[opcode_pc
];
920 opcode_size
= opcode
->n_bytes
;
923 #ifdef CYCLE_ACCURATE
924 if (branch_alignment_penalty
)
926 if ((regs
.r_pc
^ (regs
.r_pc
+ opcode_size
- 1)) & ~7)
928 tprintf("1 cycle branch alignment penalty\n");
929 cycles (branch_alignment_penalty
);
931 branch_alignment_stalls
++;
934 branch_alignment_penalty
= 0;
938 regs
.r_pc
+= opcode_size
;
940 rx_flagmask
= opcode
->flags_s
;
941 rx_flagand
= ~(int)opcode
->flags_0
;
942 rx_flagor
= opcode
->flags_1
;
948 tprintf("|%lld| = ", sll
);
951 tprintf("%lld\n", sll
);
972 if (opcode
->op
[0].type
== RX_Operand_Register
)
984 if (opcode
->op
[0].type
== RX_Operand_Register
)
999 if (opcode
->op
[0].type
== RX_Operand_Register
)
1009 if (opcode
->op
[1].type
== RX_Operand_None
|| GS())
1011 #ifdef CYCLE_ACCURATE
1012 SI old_pc
= regs
.r_pc
;
1016 #ifdef CYCLE_ACCURATE
1017 delta
= regs
.r_pc
- old_pc
;
1018 if (delta
>= 0 && delta
< 16
1021 tprintf("near forward branch bonus\n");
1027 branch_alignment_penalty
= 1;
1034 #ifdef CYCLE_ACCURATE
1045 #ifdef CYCLE_ACCURATE
1046 /* Note: specs say 3, chip says 2. */
1047 if (delta
>= 0 && delta
< 16
1050 tprintf("near forward branch bonus\n");
1056 branch_alignment_penalty
= 1;
1063 #ifdef CYCLE_ACCURATE
1071 int old_psw
= regs
.r_psw
;
1073 DO_RETURN (RX_MAKE_HIT_BREAK ());
1074 if (regs
.r_intb
== 0)
1076 tprintf("BREAK hit, no vector table.\n");
1077 DO_RETURN (RX_MAKE_EXITED(1));
1079 regs
.r_psw
&= ~(FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
1082 regs
.r_pc
= mem_get_si (regs
.r_intb
);
1090 if (opcode
->op
[0].type
== RX_Operand_Register
)
1102 if (opcode
->op
[1].type
== RX_Operand_Register
)
1106 umb
= ma
& (1 << mb
);
1107 set_zc (! umb
, umb
);
1112 v
= 1 << opcode
->op
[0].reg
;
1121 case RXO_div
: /* d = d / s */
1124 tprintf("%d / %d = ", mb
, ma
);
1125 if (ma
== 0 || (ma
== -1 && (unsigned int) mb
== 0x80000000))
1128 set_flags (FLAGBIT_O
, FLAGBIT_O
);
1134 set_flags (FLAGBIT_O
, 0);
1137 /* Note: spec says 3 to 22 cycles, we are pessimistic. */
1141 case RXO_divu
: /* d = d / s */
1144 tprintf("%u / %u = ", umb
, uma
);
1148 set_flags (FLAGBIT_O
, FLAGBIT_O
);
1154 set_flags (FLAGBIT_O
, 0);
1157 /* Note: spec says 2 to 20 cycles, we are pessimistic. */
1164 sll
= (long long)ma
* (long long)mb
;
1165 tprintf("%d * %d = %lld\n", ma
, mb
, sll
);
1166 put_reg (opcode
->op
[0].reg
, sll
);
1167 put_reg (opcode
->op
[0].reg
+ 1, sll
>> 32);
1174 ll
= (long long)uma
* (long long)umb
;
1175 tprintf("%#x * %#x = %#llx\n", uma
, umb
, ll
);
1176 put_reg (opcode
->op
[0].reg
, ll
);
1177 put_reg (opcode
->op
[0].reg
+ 1, ll
>> 32);
1207 regs
.r_psw
= regs
.r_bpsw
;
1208 regs
.r_pc
= regs
.r_bpc
;
1209 #ifdef CYCLE_ACCURATE
1210 regs
.fast_return
= 0;
1223 mb
= rxfp_ftoi (ma
, FPRM_ZERO
);
1226 tprintf("(int) %g = %d\n", int2float(ma
), mb
);
1235 DO_RETURN (rx_syscall (regs
.r
[5]));
1239 int old_psw
= regs
.r_psw
;
1240 regs
.r_psw
&= ~(FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
1243 regs
.r_pc
= mem_get_si (regs
.r_intb
+ 4 * v
);
1251 mb
= rxfp_itof (ma
, regs
.r_fpsw
);
1253 tprintf("(float) %d = %x\n", ma
, mb
);
1262 #ifdef CYCLE_ACCURATE
1267 #ifdef CYCLE_ACCURATE
1268 regs
.link_register
= regs
.r_pc
;
1270 pushpc (get_reg (pc
));
1271 if (opcode
->id
== RXO_jsrrel
)
1273 #ifdef CYCLE_ACCURATE
1274 delta
= v
- regs
.r_pc
;
1277 #ifdef CYCLE_ACCURATE
1278 /* Note: docs say 3, chip says 2 */
1279 if (delta
>= 0 && delta
< 16)
1281 tprintf ("near forward jsr bonus\n");
1286 branch_alignment_penalty
= 1;
1289 regs
.fast_return
= 1;
1295 ll
= (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16);
1297 put_reg64 (acc64
, ll
+ regs
.r_acc
);
1302 ll
= (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ());
1304 put_reg64 (acc64
, ll
+ regs
.r_acc
);
1331 if (opcode
->op
[0].type
== RX_Operand_Register
1332 && opcode
->op
[0].reg
== 16 /* PSW */)
1334 /* Special case, LDC and POPC can't ever modify PM. */
1335 int pm
= regs
.r_psw
& FLAGBIT_PM
;
1340 v
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1346 /* various things can't be changed in user mode. */
1347 if (opcode
->op
[0].type
== RX_Operand_Register
)
1348 if (opcode
->op
[0].reg
== 32)
1350 v
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1351 v
|= regs
.r_psw
& (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1353 if (opcode
->op
[0].reg
== 34 /* ISP */
1354 || opcode
->op
[0].reg
== 37 /* BPSW */
1355 || opcode
->op
[0].reg
== 39 /* INTB */
1356 || opcode
->op
[0].reg
== 38 /* VCT */)
1357 /* These are ignored. */
1367 #ifdef CYCLE_ACCURATE
1368 if ((opcode
->op
[0].type
== RX_Operand_Predec
1369 && opcode
->op
[1].type
== RX_Operand_Register
)
1370 || (opcode
->op
[0].type
== RX_Operand_Postinc
1371 && opcode
->op
[1].type
== RX_Operand_Register
))
1373 /* Special case: push reg doesn't cause a memory stall. */
1375 tprintf("push special case\n");
1394 ll
= (unsigned long long) US1() * (unsigned long long) v
;
1401 ll
= (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v
>> 16);
1403 put_reg64 (acc64
, ll
);
1409 ll
= (long long)(signed short)(GS()) * (long long)(signed short)(v
);
1411 put_reg64 (acc64
, ll
);
1416 PD (get_reg (acchi
));
1421 PD (get_reg (acclo
));
1426 PD (get_reg (accmi
));
1431 put_reg (acchi
, GS ());
1436 put_reg (acclo
, GS ());
1441 regs
.r_psw
&= ~ FLAGBITS_IPL
;
1442 regs
.r_psw
|= (GS () << FLAGSHIFT_IPL
) & FLAGBITS_IPL
;
1457 /* POPM cannot pop R0 (sp). */
1458 if (opcode
->op
[1].reg
== 0 || opcode
->op
[2].reg
== 0)
1459 EXCEPTION (EX_UNDEFINED
);
1460 if (opcode
->op
[1].reg
>= opcode
->op
[2].reg
)
1462 regs
.r_pc
= opcode_pc
;
1463 DO_RETURN (RX_MAKE_STOPPED (SIGILL
));
1465 for (v
= opcode
->op
[1].reg
; v
<= opcode
->op
[2].reg
; v
++)
1469 put_reg (v
, pop ());
1474 /* PUSHM cannot push R0 (sp). */
1475 if (opcode
->op
[1].reg
== 0 || opcode
->op
[2].reg
== 0)
1476 EXCEPTION (EX_UNDEFINED
);
1477 if (opcode
->op
[1].reg
>= opcode
->op
[2].reg
)
1479 regs
.r_pc
= opcode_pc
;
1480 return RX_MAKE_STOPPED (SIGILL
);
1482 for (v
= opcode
->op
[2].reg
; v
>= opcode
->op
[1].reg
; v
--)
1487 cycles (opcode
->op
[2].reg
- opcode
->op
[1].reg
+ 1);
1491 ll
= get_reg64 (acc64
) << GS ();
1492 ll
+= 0x80000000ULL
;
1493 if ((signed long long)ll
> (signed long long)0x00007fff00000000ULL
)
1494 ll
= 0x00007fff00000000ULL
;
1495 else if ((signed long long)ll
< (signed long long)0xffff800000000000ULL
)
1496 ll
= 0xffff800000000000ULL
;
1498 ll
&= 0xffffffff00000000ULL
;
1499 put_reg64 (acc64
, ll
);
1505 regs
.r_pc
= poppc ();
1506 regs
.r_psw
= poppc ();
1508 regs
.r_psw
|= FLAGBIT_U
;
1509 #ifdef CYCLE_ACCURATE
1510 regs
.fast_return
= 0;
1517 umb
= (((uma
>> 24) & 0xff)
1518 | ((uma
>> 8) & 0xff00)
1519 | ((uma
<< 8) & 0xff0000)
1520 | ((uma
<< 24) & 0xff000000UL
));
1527 umb
= (((uma
>> 8) & 0x00ff00ff)
1528 | ((uma
<< 8) & 0xff00ff00UL
));
1536 #ifdef CYCLE_ACCURATE
1540 while (regs
.r
[3] != 0)
1544 switch (opcode
->size
)
1547 ma
= mem_get_si (regs
.r
[1]);
1548 mb
= mem_get_si (regs
.r
[2]);
1553 ma
= sign_ext (mem_get_hi (regs
.r
[1]), 16);
1554 mb
= sign_ext (mem_get_hi (regs
.r
[2]), 16);
1559 ma
= sign_ext (mem_get_qi (regs
.r
[1]), 8);
1560 mb
= sign_ext (mem_get_qi (regs
.r
[2]), 8);
1567 /* We do the multiply as a signed value. */
1568 sll
= (long long)ma
* (long long)mb
;
1569 tprintf(" %016llx = %d * %d\n", sll
, ma
, mb
);
1570 /* but we do the sum as unsigned, while sign extending the operands. */
1571 tmp
= regs
.r
[4] + (sll
& 0xffffffffUL
);
1572 regs
.r
[4] = tmp
& 0xffffffffUL
;
1575 tmp
+= regs
.r
[5] + (sll
& 0xffffffffUL
);
1576 regs
.r
[5] = tmp
& 0xffffffffUL
;
1579 tmp
+= regs
.r
[6] + (sll
& 0xffffffffUL
);
1580 regs
.r
[6] = tmp
& 0xffffffffUL
;
1581 tprintf("%08lx\033[36m%08lx\033[0m%08lx\n",
1582 (unsigned long) regs
.r
[6],
1583 (unsigned long) regs
.r
[5],
1584 (unsigned long) regs
.r
[4]);
1588 if (regs
.r
[6] & 0x00008000)
1589 regs
.r
[6] |= 0xffff0000UL
;
1591 regs
.r
[6] &= 0x0000ffff;
1592 ma
= (regs
.r
[6] & 0x80000000UL
) ? FLAGBIT_S
: 0;
1593 if (regs
.r
[6] != 0 && regs
.r
[6] != 0xffffffffUL
)
1594 set_flags (FLAGBIT_O
|FLAGBIT_S
, ma
| FLAGBIT_O
);
1596 set_flags (FLAGBIT_O
|FLAGBIT_S
, ma
);
1597 #ifdef CYCLE_ACCURATE
1598 switch (opcode
->size
)
1601 cycles (6 + 4 * tx
);
1604 cycles (6 + 5 * (tx
/ 2) + 4 * (tx
% 2));
1607 cycles (6 + 7 * (tx
/ 4) + 4 * (tx
% 4));
1617 ma
= v
& 0x80000000UL
;
1629 uma
|= (carry
? 0x80000000UL
: 0);
1630 set_szc (uma
, 4, mb
);
1640 uma
= (uma
<< mb
) | (uma
>> (32-mb
));
1643 set_szc (uma
, 4, mb
);
1653 uma
= (uma
>> mb
) | (uma
<< (32-mb
));
1654 mb
= uma
& 0x80000000;
1656 set_szc (uma
, 4, mb
);
1664 mb
= rxfp_ftoi (ma
, regs
.r_fpsw
);
1667 tprintf("(int) %g = %d\n", int2float(ma
), mb
);
1674 #ifdef CYCLE_ACCURATE
1677 regs
.r_pc
= poppc ();
1678 #ifdef CYCLE_ACCURATE
1679 /* Note: specs say 5, chip says 3. */
1680 if (regs
.fast_return
&& regs
.link_register
== regs
.r_pc
)
1685 tprintf("fast return bonus\n");
1689 regs
.fast_return
= 0;
1690 branch_alignment_penalty
= 1;
1696 if (opcode
->op
[2].type
== RX_Operand_Register
)
1699 /* RTSD cannot pop R0 (sp). */
1700 put_reg (0, get_reg (0) + GS() - (opcode
->op
[0].reg
-opcode
->op
[2].reg
+1)*4);
1701 if (opcode
->op
[2].reg
== 0)
1702 EXCEPTION (EX_UNDEFINED
);
1703 #ifdef CYCLE_ACCURATE
1704 tx
= opcode
->op
[0].reg
- opcode
->op
[2].reg
+ 1;
1706 for (i
= opcode
->op
[2].reg
; i
<= opcode
->op
[0].reg
; i
++)
1709 put_reg (i
, pop ());
1714 #ifdef CYCLE_ACCURATE
1717 put_reg (0, get_reg (0) + GS());
1719 put_reg (pc
, poppc());
1720 #ifdef CYCLE_ACCURATE
1721 if (regs
.fast_return
&& regs
.link_register
== regs
.r_pc
)
1723 tprintf("fast return bonus\n");
1727 cycles (tx
< 3 ? 3 : tx
+ 1);
1731 cycles (tx
< 5 ? 5 : tx
+ 1);
1733 regs
.fast_return
= 0;
1734 branch_alignment_penalty
= 1;
1739 if (FLAG_O
&& FLAG_S
)
1741 else if (FLAG_O
&& ! FLAG_S
)
1747 MATH_OP (-, ! carry
);
1759 #ifdef CYCLE_ACCURATE
1762 while (regs
.r
[3] != 0)
1764 uma
= mem_get_qi (regs
.r
[1] ++);
1765 umb
= mem_get_qi (regs
.r
[2] ++);
1767 if (uma
!= umb
|| uma
== 0)
1773 set_zc (0, ((int)uma
- (int)umb
) >= 0);
1774 cycles (2 + 4 * (tx
/ 4) + 4 * (tx
% 4));
1778 v
= 1 << opcode
->op
[0].reg
;
1789 #ifdef CYCLE_ACCURATE
1794 uma
= mem_get_qi (regs
.r
[2] --);
1795 mem_put_qi (regs
.r
[1]--, uma
);
1798 #ifdef CYCLE_ACCURATE
1800 cycles (6 + 3 * (tx
/ 4) + 3 * (tx
% 4));
1802 cycles (2 + 3 * (tx
% 4));
1808 #ifdef CYCLE_ACCURATE
1813 uma
= mem_get_qi (regs
.r
[2] ++);
1814 mem_put_qi (regs
.r
[1]++, uma
);
1817 cycles (2 + 3 * (int)(tx
/ 4) + 3 * (tx
% 4));
1821 #ifdef CYCLE_ACCURATE
1824 while (regs
.r
[3] != 0)
1826 uma
= mem_get_qi (regs
.r
[2] ++);
1827 mem_put_qi (regs
.r
[1]++, uma
);
1832 cycles (2 + 3 * (int)(tx
/ 4) + 3 * (tx
% 4));
1835 case RXO_shar
: /* d = ma >> mb */
1836 SHIFT_OP (sll
, int, mb
, >>=, 1);
1840 case RXO_shll
: /* d = ma << mb */
1841 SHIFT_OP (ll
, int, mb
, <<=, 0x80000000UL
);
1845 case RXO_shlr
: /* d = ma >> mb */
1846 SHIFT_OP (ll
, unsigned int, mb
, >>=, 1);
1852 #ifdef CYCLE_ACCURATE
1855 switch (opcode
->size
)
1858 while (regs
.r
[3] != 0)
1860 mem_put_si (regs
.r
[1], regs
.r
[2]);
1867 while (regs
.r
[3] != 0)
1869 mem_put_hi (regs
.r
[1], regs
.r
[2]);
1873 cycles (2 + (int)(tx
/ 2) + tx
% 2);
1876 while (regs
.r
[3] != 0)
1878 mem_put_qi (regs
.r
[1], regs
.r
[2]);
1882 cycles (2 + (int)(tx
/ 4) + tx
% 4);
1897 regs
.r_psw
|= FLAGBIT_I
;
1898 DO_RETURN (RX_MAKE_STOPPED(0));
1906 #ifdef CYCLE_ACCURATE
1914 switch (opcode
->size
)
1918 while (regs
.r
[3] != 0)
1921 umb
= mem_get_si (get_reg (1));
1926 cycles (3 + 3 * tx
);
1929 uma
= get_reg (2) & 0xffff;
1930 while (regs
.r
[3] != 0)
1933 umb
= mem_get_hi (get_reg (1));
1938 cycles (3 + 3 * (tx
/ 2) + 3 * (tx
% 2));
1941 uma
= get_reg (2) & 0xff;
1942 while (regs
.r
[3] != 0)
1945 umb
= mem_get_qi (regs
.r
[1]);
1950 cycles (3 + 3 * (tx
/ 4) + 3 * (tx
% 4));
1958 set_zc (0, ((int)uma
- (int)umb
) >= 0);
1963 #ifdef CYCLE_ACCURATE
1968 switch (opcode
->size
)
1972 while (regs
.r
[3] != 0)
1975 umb
= mem_get_si (get_reg (1));
1980 cycles (3 + 3 * tx
);
1983 uma
= get_reg (2) & 0xffff;
1984 while (regs
.r
[3] != 0)
1987 umb
= mem_get_hi (get_reg (1));
1992 cycles (3 + 3 * (tx
/ 2) + 3 * (tx
% 2));
1995 uma
= get_reg (2) & 0xff;
1996 while (regs
.r
[3] != 0)
1999 umb
= mem_get_qi (regs
.r
[1]);
2004 cycles (3 + 3 * (tx
/ 4) + 3 * (tx
% 4));
2012 set_zc (0, ((int)uma
- (int)umb
) >= 0);
2017 regs
.r_psw
|= FLAGBIT_I
;
2018 DO_RETURN (RX_MAKE_STOPPED(0));
2021 #ifdef CYCLE_ACCURATE
2024 v
= GS (); /* This is the memory operand, if any. */
2025 PS (GD ()); /* and this may change the address register. */
2028 #ifdef CYCLE_ACCURATE
2029 /* all M cycles happen during xchg's cycles. */
2040 EXCEPTION (EX_UNDEFINED
);
2043 #ifdef CYCLE_ACCURATE
2046 regs
.m2m
|= M2M_SRC
;
2048 regs
.m2m
|= M2M_DST
;
2055 if (prev_cycle_count
== regs
.cycle_count
)
2057 printf("Cycle count not updated! id %s\n", id_names
[opcode
->id
]);
2063 if (running_benchmark
)
2065 int omap
= op_lookup (opcode
->op
[0].type
, opcode
->op
[1].type
, opcode
->op
[2].type
);
2068 cycles_per_id
[opcode
->id
][omap
] += regs
.cycle_count
- prev_cycle_count
;
2069 times_per_id
[opcode
->id
][omap
] ++;
2071 times_per_pair
[prev_opcode_id
][po0
][opcode
->id
][omap
] ++;
2073 prev_opcode_id
= opcode
->id
;
2078 return RX_MAKE_STEPPED ();
2083 reset_pipeline_stats (void)
2085 memset (cycles_per_id
, 0, sizeof(cycles_per_id
));
2086 memset (times_per_id
, 0, sizeof(times_per_id
));
2088 register_stalls
= 0;
2090 branch_alignment_stalls
= 0;
2092 memset (times_per_pair
, 0, sizeof(times_per_pair
));
2093 running_benchmark
= 1;
2095 benchmark_start_cycle
= regs
.cycle_count
;
2099 halt_pipeline_stats (void)
2101 running_benchmark
= 0;
2102 benchmark_end_cycle
= regs
.cycle_count
;
2107 pipeline_stats (void)
2114 #ifdef CYCLE_ACCURATE
2117 printf ("cycles: %llu\n", regs
.cycle_count
);
2121 printf ("cycles: %13s\n", comma (regs
.cycle_count
));
2125 if (benchmark_start_cycle
)
2126 printf ("bmark: %13s\n", comma (benchmark_end_cycle
- benchmark_start_cycle
));
2129 for (i
= 0; i
< N_RXO
; i
++)
2130 for (o1
= 0; o1
< N_MAP
; o1
++)
2131 if (times_per_id
[i
][o1
])
2132 printf("%13s %13s %7.2f %s %s\n",
2133 comma (cycles_per_id
[i
][o1
]),
2134 comma (times_per_id
[i
][o1
]),
2135 (double)cycles_per_id
[i
][o1
] / times_per_id
[i
][o1
],
2136 op_cache_string(o1
),
2140 for (p
= 0; p
< N_RXO
; p
++)
2141 for (p1
= 0; p1
< N_MAP
; p1
++)
2142 for (i
= 0; i
< N_RXO
; i
++)
2143 for (o1
= 0; o1
< N_MAP
; o1
++)
2144 if (times_per_pair
[p
][p1
][i
][o1
])
2146 printf("%13s %s %-9s -> %s %s\n",
2147 comma (times_per_pair
[p
][p1
][i
][o1
]),
2148 op_cache_string(p1
),
2150 op_cache_string(o1
),
2155 printf("%13s memory stalls\n", comma (memory_stalls
));
2156 printf("%13s register stalls\n", comma (register_stalls
));
2157 printf("%13s branches taken (non-return)\n", comma (branch_stalls
));
2158 printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls
));
2159 printf("%13s fast returns\n", comma (fast_returns
));
This page took 0.105808 seconds and 4 git commands to generate.