X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=sim%2Faarch64%2Fsimulator.c;h=5f16a69478c8cb0edab4f85274c489cec0b23ad2;hb=a1d1fa3e417b4bd8e79e2a731f9c6089e2d5f747;hp=59579468dad33399eab41f79f260eed21b2ed872;hpb=caa8d70005e4e12392683c799b30790fc4c62166;p=deliverable%2Fbinutils-gdb.git diff --git a/sim/aarch64/simulator.c b/sim/aarch64/simulator.c index 59579468da..5f16a69478 100644 --- a/sim/aarch64/simulator.c +++ b/sim/aarch64/simulator.c @@ -1,6 +1,6 @@ /* simulator.c -- Interface for the AArch64 simulator. - Copyright (C) 2015 Free Software Foundation, Inc. + Copyright (C) 2015-2020 Free Software Foundation, Inc. Contributed by Red Hat. @@ -24,13 +24,10 @@ #include #include #include -#include #include #include #include -#include "dis-asm.h" - #include "simulator.h" #include "cpustate.h" #include "memory.h" @@ -38,23 +35,21 @@ #define NO_SP 0 #define SP_OK 1 -bfd_boolean disas = FALSE; - #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag)) -#define IS_SET(_X) ( TST (( _X ))) -#define IS_CLEAR(_X) (!TST (( _X ))) +#define IS_SET(_X) (TST (( _X )) ? 1 : 0) +#define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1) + +/* Space saver macro. */ +#define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW)) #define HALT_UNALLOC \ do \ { \ - if (TRACE_INSN_P (cpu)) \ - { \ - aarch64_print_insn (CPU_STATE (cpu), aarch64_get_PC (cpu)); \ - TRACE_INSN (cpu, \ - "Unallocated instruction detected at sim line %d,"\ - " exe addr %" PRIx64, \ - __LINE__, aarch64_get_PC (cpu)); \ - } \ + TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \ + TRACE_INSN (cpu, \ + "Unallocated instruction detected at sim line %d," \ + " exe addr %" PRIx64, \ + __LINE__, aarch64_get_PC (cpu)); \ sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\ sim_stopped, SIM_SIGILL); \ } \ @@ -63,14 +58,14 @@ bfd_boolean disas = FALSE; #define HALT_NYI \ do \ { \ - if (TRACE_INSN_P (cpu)) \ - { \ - aarch64_print_insn (CPU_STATE (cpu), aarch64_get_PC (cpu)); \ - TRACE_INSN (cpu, \ - "Unimplemented instruction detected at sim line %d,"\ - " exe addr %" PRIx64, \ - __LINE__, aarch64_get_PC (cpu)); \ - } \ + TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \ + TRACE_INSN (cpu, \ + "Unimplemented instruction detected at sim line %d," \ + " exe addr %" PRIx64, \ + __LINE__, aarch64_get_PC (cpu)); \ + if (! TRACE_ANY_P (cpu)) \ + sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \ + aarch64_get_instr (cpu)); \ sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\ sim_stopped, SIM_SIGABRT); \ } \ @@ -79,19 +74,11 @@ bfd_boolean disas = FALSE; #define NYI_assert(HI, LO, EXPECTED) \ do \ { \ - if (uimm (aarch64_get_instr (cpu), (HI), (LO)) != (EXPECTED)) \ + if (INSTR ((HI), (LO)) != (EXPECTED)) \ HALT_NYI; \ } \ while (0) -#define HALT_UNREACHABLE \ - do \ - { \ - TRACE_EVENTS (cpu, "ISE: unreachable code point"); \ - sim_engine_abort (NULL, cpu, aarch64_get_PC (cpu), "Internal Error"); \ - } \ - while (0) - /* Helper functions used by expandLogicalImmediate. */ /* for i = 1, ... N result = 1 other bits are zero */ @@ -190,7 +177,7 @@ dexNotify (sim_cpu *cpu) { /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry 2 ==> exit Java, 3 ==> start next bytecode. */ - uint32_t type = uimm (aarch64_get_instr (cpu), 14, 0); + uint32_t type = INSTR (14, 0); TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type); @@ -245,7 +232,7 @@ dexPseudo (sim_cpu *cpu) sim_stopped, SIM_SIGTRAP); } - dispatch = uimm (aarch64_get_instr (cpu), 31, 15); + dispatch = INSTR (31, 15); /* We do not handle callouts at the moment. */ if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR) @@ -273,9 +260,10 @@ dexPseudo (sim_cpu *cpu) static void ldur32 (sim_cpu *cpu, int32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); @@ -285,9 +273,10 @@ ldur32 (sim_cpu *cpu, int32_t offset) static void ldur64 (sim_cpu *cpu, int32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); @@ -297,9 +286,10 @@ ldur64 (sim_cpu *cpu, int32_t offset) static void ldurb32 (sim_cpu *cpu, int32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); @@ -309,9 +299,10 @@ ldurb32 (sim_cpu *cpu, int32_t offset) static void ldursb32 (sim_cpu *cpu, int32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); @@ -321,9 +312,10 @@ ldursb32 (sim_cpu *cpu, int32_t offset) static void ldursb64 (sim_cpu *cpu, int32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); @@ -333,9 +325,10 @@ ldursb64 (sim_cpu *cpu, int32_t offset) static void ldurh32 (sim_cpu *cpu, int32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); @@ -345,9 +338,10 @@ ldurh32 (sim_cpu *cpu, int32_t offset) static void ldursh32 (sim_cpu *cpu, int32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); @@ -357,9 +351,10 @@ ldursh32 (sim_cpu *cpu, int32_t offset) static void ldursh64 (sim_cpu *cpu, int32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); @@ -369,9 +364,10 @@ ldursh64 (sim_cpu *cpu, int32_t offset) static void ldursw (sim_cpu *cpu, int32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); @@ -384,9 +380,10 @@ ldursw (sim_cpu *cpu, int32_t offset) static void stur32 (sim_cpu *cpu, int32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, aarch64_get_reg_u32 (cpu, rd, NO_SP)); @@ -396,9 +393,10 @@ stur32 (sim_cpu *cpu, int32_t offset) static void stur64 (sim_cpu *cpu, int32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, aarch64_get_reg_u64 (cpu, rd, NO_SP)); @@ -408,9 +406,10 @@ stur64 (sim_cpu *cpu, int32_t offset) static void sturb (sim_cpu *cpu, int32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_mem_u8 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, aarch64_get_reg_u8 (cpu, rd, NO_SP)); @@ -420,9 +419,10 @@ sturb (sim_cpu *cpu, int32_t offset) static void sturh (sim_cpu *cpu, int32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, aarch64_get_reg_u16 (cpu, rd, NO_SP)); @@ -436,8 +436,9 @@ sturh (sim_cpu *cpu, int32_t offset) static void ldr32_pcrel (sim_cpu *cpu, int32_t offset) { - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u32 (cpu, aarch64_get_PC (cpu) + offset * 4)); @@ -447,8 +448,9 @@ ldr32_pcrel (sim_cpu *cpu, int32_t offset) static void ldr_pcrel (sim_cpu *cpu, int32_t offset) { - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u64 (cpu, aarch64_get_PC (cpu) + offset * 4)); @@ -458,8 +460,9 @@ ldr_pcrel (sim_cpu *cpu, int32_t offset) static void ldrsw_pcrel (sim_cpu *cpu, int32_t offset) { - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_s32 (cpu, aarch64_get_PC (cpu) + offset * 4)); @@ -469,32 +472,35 @@ ldrsw_pcrel (sim_cpu *cpu, int32_t offset) static void fldrs_pcrel (sim_cpu *cpu, int32_t offset) { - unsigned int rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rd = INSTR (4, 0); - aarch64_set_FP_float (cpu, rd, - aarch64_get_mem_float - (cpu, aarch64_get_PC (cpu) + offset * 4)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_vec_u32 (cpu, rd, 0, + aarch64_get_mem_u32 + (cpu, aarch64_get_PC (cpu) + offset * 4)); } /* double pc-relative load */ static void fldrd_pcrel (sim_cpu *cpu, int32_t offset) { - unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int st = INSTR (4, 0); - aarch64_set_FP_double (cpu, st, - aarch64_get_mem_double - (cpu, aarch64_get_PC (cpu) + offset * 4)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_vec_u64 (cpu, st, 0, + aarch64_get_mem_u64 + (cpu, aarch64_get_PC (cpu) + offset * 4)); } /* long double pc-relative load. */ static void fldrq_pcrel (sim_cpu *cpu, int32_t offset) { - unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int st = INSTR (4, 0); uint64_t addr = aarch64_get_PC (cpu) + offset * 4; FRegister a; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_get_mem_long_double (cpu, addr, & a); aarch64_set_FP_long_double (cpu, st, a); } @@ -508,7 +514,7 @@ fldrq_pcrel (sim_cpu *cpu, int32_t offset) /* This can be used to optionally scale a register derived offset by applying the requisite shift as indicated by the Scaling - argument. the second argument is either Byte, Short, Word + argument. The second argument is either Byte, Short, Word or Long. The third argument is either Scaled or Unscaled. N.B. when _Scaling is Scaled the shift gets ANDed with all 1s while when it is Unscaled it gets ANDed with 0. */ @@ -549,14 +555,15 @@ extend (uint32_t value, Extension extension) static void fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned st = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); if (wb != Post) address += offset; - aarch64_set_FP_float (cpu, st, aarch64_get_mem_float (cpu, address)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address)); if (wb == Post) address += offset; @@ -564,17 +571,65 @@ fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) aarch64_set_reg_u64 (cpu, rn, SP_OK, address); } +/* Load 8 bit with unsigned 12 bit offset. */ +static void +fldrb_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rd = INSTR (4, 0); + unsigned rn = INSTR (9, 5); + uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset; + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr)); +} + +/* Load 16 bit scaled unsigned 12 bit. */ +static void +fldrh_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rd = INSTR (4, 0); + unsigned rn = INSTR (9, 5); + uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr)); +} + /* Load 32 bit scaled unsigned 12 bit. */ static void fldrs_abs (sim_cpu *cpu, uint32_t offset) { - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rd = INSTR (4, 0); + unsigned rn = INSTR (9, 5); + uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr)); +} + +/* Load 64 bit scaled unsigned 12 bit. */ +static void +fldrd_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rd = INSTR (4, 0); + unsigned rn = INSTR (9, 5); + uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr)); +} + +/* Load 128 bit scaled unsigned 12 bit. */ +static void +fldrq_abs (sim_cpu *cpu, uint32_t offset) +{ + unsigned rd = INSTR (4, 0); + unsigned rn = INSTR (9, 5); + uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128); - aarch64_set_FP_float (cpu, st, - aarch64_get_mem_float - (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) - + SCALE (offset, 32))); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr)); + aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8)); } /* Load 32 bit scaled or unscaled zero- or sign-extended @@ -582,30 +637,31 @@ fldrs_abs (sim_cpu *cpu, uint32_t offset) static void fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned st = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); uint64_t displacement = OPT_SCALE (extended, 32, scaling); - aarch64_set_FP_float (cpu, st, - aarch64_get_mem_float - (cpu, address + displacement)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 + (cpu, address + displacement)); } /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */ static void fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned st = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); if (wb != Post) address += offset; - aarch64_set_FP_double (cpu, st, aarch64_get_mem_double (cpu, address)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address)); if (wb == Post) address += offset; @@ -614,22 +670,11 @@ fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) aarch64_set_reg_u64 (cpu, rn, SP_OK, address); } -/* Load 64 bit scaled unsigned 12 bit. */ -static void -fldrd_abs (sim_cpu *cpu, uint32_t offset) -{ - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); - uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64); - - aarch64_set_FP_double (cpu, st, aarch64_get_mem_double (cpu, address)); -} - /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */ static void fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rm = INSTR (20, 16); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); uint64_t displacement = OPT_SCALE (extended, 64, scaling); @@ -641,13 +686,14 @@ static void fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { FRegister a; - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned st = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); if (wb != Post) address += offset; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_get_mem_long_double (cpu, address, & a); aarch64_set_FP_long_double (cpu, st, a); @@ -658,24 +704,11 @@ fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) aarch64_set_reg_u64 (cpu, rn, SP_OK, address); } -/* Load 128 bit scaled unsigned 12 bit. */ -static void -fldrq_abs (sim_cpu *cpu, uint32_t offset) -{ - FRegister a; - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); - uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128); - - aarch64_get_mem_long_double (cpu, address, & a); - aarch64_set_FP_long_double (cpu, st, a); -} - /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */ static void fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rm = INSTR (20, 16); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); uint64_t displacement = OPT_SCALE (extended, 128, scaling); @@ -710,9 +743,10 @@ fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) static void ldr32_abs (sim_cpu *cpu, uint32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* The target register may not be SP but the source may be. */ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) @@ -723,8 +757,8 @@ ldr32_abs (sim_cpu *cpu, uint32_t offset) static void ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); uint64_t address; if (rn == rt && wb != NoWriteBack) @@ -735,6 +769,7 @@ ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) if (wb != Post) address += offset; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address)); if (wb == Post) @@ -749,15 +784,16 @@ ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) static void ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); /* rn may reference SP, rm and rt must reference ZR */ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); uint64_t displacement = OPT_SCALE (extended, 32, scaling); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address + displacement)); } @@ -766,9 +802,10 @@ ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) static void ldr_abs (sim_cpu *cpu, uint32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* The target register may not be SP but the source may be. */ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) @@ -779,8 +816,8 @@ ldr_abs (sim_cpu *cpu, uint32_t offset) static void ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); uint64_t address; if (rn == rt && wb != NoWriteBack) @@ -791,6 +828,7 @@ ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) if (wb != Post) address += offset; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address)); if (wb == Post) @@ -805,15 +843,16 @@ ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) static void ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); /* rn may reference SP, rm and rt must reference ZR */ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); uint64_t displacement = OPT_SCALE (extended, 64, scaling); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address + displacement)); } @@ -822,9 +861,10 @@ ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) static void ldrb32_abs (sim_cpu *cpu, uint32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* The target register may not be SP but the source may be there is no scaling required for a byte load. */ aarch64_set_reg_u64 (cpu, rt, NO_SP, @@ -836,8 +876,8 @@ ldrb32_abs (sim_cpu *cpu, uint32_t offset) static void ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); uint64_t address; if (rn == rt && wb != NoWriteBack) @@ -848,6 +888,7 @@ ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) if (wb != Post) address += offset; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address)); if (wb == Post) @@ -862,15 +903,16 @@ ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) static void ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); /* rn may reference SP, rm and rt must reference ZR */ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* There is no scaling required for a byte load. */ aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address + displacement)); @@ -881,9 +923,10 @@ ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) static void ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); uint64_t address; + int64_t val; if (rn == rt && wb != NoWriteBack) HALT_UNALLOC; @@ -893,7 +936,9 @@ ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) if (wb != Post) address += offset; - aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_s8 (cpu, address)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + val = aarch64_get_mem_s8 (cpu, address); + aarch64_set_reg_s64 (cpu, rt, NO_SP, val); if (wb == Post) address += offset; @@ -914,16 +959,17 @@ ldrsb_abs (sim_cpu *cpu, uint32_t offset) static void ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); /* rn may reference SP, rm and rt must reference ZR */ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* There is no scaling required for a byte load. */ - aarch64_set_reg_u64 (cpu, rt, NO_SP, + aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8 (cpu, address + displacement)); } @@ -931,13 +977,15 @@ ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) static void ldrh32_abs (sim_cpu *cpu, uint32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + uint32_t val; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* The target register may not be SP but the source may be. */ - aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 - (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) - + SCALE (offset, 16))); + val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + SCALE (offset, 16)); + aarch64_set_reg_u32 (cpu, rt, NO_SP, val); } /* 32 bit load zero-extended short unscaled signed 9 bit @@ -945,8 +993,8 @@ ldrh32_abs (sim_cpu *cpu, uint32_t offset) static void ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); uint64_t address; if (rn == rt && wb != NoWriteBack) @@ -957,7 +1005,8 @@ ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) if (wb != Post) address += offset; - aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address)); if (wb == Post) address += offset; @@ -971,16 +1020,17 @@ ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) static void ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); /* rn may reference SP, rm and rt must reference ZR */ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); uint64_t displacement = OPT_SCALE (extended, 16, scaling); - aarch64_set_reg_u64 (cpu, rt, NO_SP, + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address + displacement)); } @@ -988,14 +1038,15 @@ ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) static void ldrsh32_abs (sim_cpu *cpu, uint32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + int32_t val; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* The target register may not be SP but the source may be. */ - aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s16 - (cpu, - aarch64_get_reg_u64 (cpu, rn, SP_OK) - + SCALE (offset, 16))); + val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + SCALE (offset, 16)); + aarch64_set_reg_s32 (cpu, rt, NO_SP, val); } /* 32 bit load sign-extended short unscaled signed 9 bit @@ -1003,8 +1054,8 @@ ldrsh32_abs (sim_cpu *cpu, uint32_t offset) static void ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); uint64_t address; if (rn == rt && wb != NoWriteBack) @@ -1015,8 +1066,9 @@ ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) if (wb != Post) address += offset; - aarch64_set_reg_u64 (cpu, rt, NO_SP, - (uint32_t) aarch64_get_mem_s16 (cpu, address)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_reg_s32 (cpu, rt, NO_SP, + (int32_t) aarch64_get_mem_s16 (cpu, address)); if (wb == Post) address += offset; @@ -1030,17 +1082,18 @@ ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) static void ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); /* rn may reference SP, rm and rt must reference ZR */ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); uint64_t displacement = OPT_SCALE (extended, 16, scaling); - aarch64_set_reg_u64 (cpu, rt, NO_SP, - (uint32_t) aarch64_get_mem_s16 + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_reg_s32 (cpu, rt, NO_SP, + (int32_t) aarch64_get_mem_s16 (cpu, address + displacement)); } @@ -1048,13 +1101,15 @@ ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) static void ldrsh_abs (sim_cpu *cpu, uint32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + int64_t val; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* The target register may not be SP but the source may be. */ - aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_s16 - (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) - + SCALE (offset, 16))); + val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + SCALE (offset, 16)); + aarch64_set_reg_s64 (cpu, rt, NO_SP, val); } /* 64 bit load sign-extended short unscaled signed 9 bit @@ -1062,19 +1117,22 @@ ldrsh_abs (sim_cpu *cpu, uint32_t offset) static void ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); uint64_t address; + int64_t val; if (rn == rt && wb != NoWriteBack) HALT_UNALLOC; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); address = aarch64_get_reg_u64 (cpu, rn, SP_OK); if (wb != Post) address += offset; - aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_s16 (cpu, address)); + val = aarch64_get_mem_s16 (cpu, address); + aarch64_set_reg_s64 (cpu, rt, NO_SP, val); if (wb == Post) address += offset; @@ -1088,30 +1146,35 @@ ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) static void ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + /* rn may reference SP, rm and rt must reference ZR */ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); uint64_t displacement = OPT_SCALE (extended, 16, scaling); + int64_t val; - aarch64_set_reg_u64 (cpu, rt, NO_SP, - aarch64_get_mem_s16 (cpu, address + displacement)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + val = aarch64_get_mem_s16 (cpu, address + displacement); + aarch64_set_reg_s64 (cpu, rt, NO_SP, val); } /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */ static void ldrsw_abs (sim_cpu *cpu, uint32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + int64_t val; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + + SCALE (offset, 32)); /* The target register may not be SP but the source may be. */ - return aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 - (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) - + SCALE (offset, 32))); + return aarch64_set_reg_s64 (cpu, rt, NO_SP, val); } /* 64 bit load sign-extended 32 bit unscaled signed 9 bit @@ -1119,8 +1182,8 @@ ldrsw_abs (sim_cpu *cpu, uint32_t offset) static void ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); uint64_t address; if (rn == rt && wb != NoWriteBack) @@ -1131,6 +1194,7 @@ ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) if (wb != Post) address += offset; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address)); if (wb == Post) @@ -1145,15 +1209,16 @@ ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) static void ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); /* rn may reference SP, rm and rt must reference ZR */ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); uint64_t displacement = OPT_SCALE (extended, 32, scaling); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address + displacement)); } @@ -1165,9 +1230,10 @@ ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) static void str32_abs (sim_cpu *cpu, uint32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* The target register may not be SP but the source may be. */ aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32)), @@ -1178,8 +1244,8 @@ str32_abs (sim_cpu *cpu, uint32_t offset) static void str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); uint64_t address; if (rn == rt && wb != NoWriteBack) @@ -1189,6 +1255,7 @@ str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) if (wb != Post) address += offset; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP)); if (wb == Post) @@ -1203,14 +1270,15 @@ str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) static void str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); uint64_t displacement = OPT_SCALE (extended, 32, scaling); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_mem_u32 (cpu, address + displacement, aarch64_get_reg_u64 (cpu, rt, NO_SP)); } @@ -1219,9 +1287,10 @@ str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) static void str_abs (sim_cpu *cpu, uint32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64), @@ -1232,8 +1301,8 @@ str_abs (sim_cpu *cpu, uint32_t offset) static void str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); uint64_t address; if (rn == rt && wb != NoWriteBack) @@ -1244,6 +1313,7 @@ str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) if (wb != Post) address += offset; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP)); if (wb == Post) @@ -1258,9 +1328,9 @@ str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) static void str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); /* rn may reference SP, rm and rt must reference ZR */ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); @@ -1268,6 +1338,7 @@ str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) extension); uint64_t displacement = OPT_SCALE (extended, 64, scaling); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_mem_u64 (cpu, address + displacement, aarch64_get_reg_u64 (cpu, rt, NO_SP)); } @@ -1276,9 +1347,10 @@ str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) static void strb_abs (sim_cpu *cpu, uint32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* The target register may not be SP but the source may be. There is no scaling required for a byte load. */ aarch64_set_mem_u8 (cpu, @@ -1290,8 +1362,8 @@ strb_abs (sim_cpu *cpu, uint32_t offset) static void strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); uint64_t address; if (rn == rt && wb != NoWriteBack) @@ -1302,6 +1374,7 @@ strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) if (wb != Post) address += offset; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP)); if (wb == Post) @@ -1316,15 +1389,16 @@ strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) static void strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); /* rn may reference SP, rm and rt must reference ZR */ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* There is no scaling required for a byte load. */ aarch64_set_mem_u8 (cpu, address + displacement, aarch64_get_reg_u8 (cpu, rt, NO_SP)); @@ -1334,9 +1408,10 @@ strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) static void strh_abs (sim_cpu *cpu, uint32_t offset) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* The target register may not be SP but the source may be. */ aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16), @@ -1347,8 +1422,8 @@ strh_abs (sim_cpu *cpu, uint32_t offset) static void strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); uint64_t address; if (rn == rt && wb != NoWriteBack) @@ -1359,6 +1434,7 @@ strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) if (wb != Post) address += offset; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP)); if (wb == Post) @@ -1373,15 +1449,16 @@ strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) static void strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); /* rn may reference SP, rm and rt must reference ZR */ uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); uint64_t displacement = OPT_SCALE (extended, 16, scaling); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_mem_u16 (cpu, address + displacement, aarch64_get_reg_u16 (cpu, rt, NO_SP)); } @@ -1397,7 +1474,7 @@ prfm_abs (sim_cpu *cpu, uint32_t offset) 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM, 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM, ow ==> UNALLOC - PrfOp prfop = prfop (aarch64_get_instr (cpu), 4, 0); + PrfOp prfop = prfop (instr, 4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64). */ @@ -1416,7 +1493,7 @@ prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM, ow ==> UNALLOC rn may reference SP, rm may only reference ZR - PrfOp prfop = prfop (aarch64_get_instr (cpu), 4, 0); + PrfOp prfop = prfop (instr, 4, 0); uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); @@ -1437,7 +1514,7 @@ prfm_pcrel (sim_cpu *cpu, int32_t offset) 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM, 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM, ow ==> UNALLOC - PrfOp prfop = prfop (aarch64_get_instr (cpu), 4, 0); + PrfOp prfop = prfop (instr, 4, 0); uint64_t address = aarch64_get_PC (cpu) + offset. */ /* TODO : implement this */ @@ -1448,13 +1525,14 @@ prfm_pcrel (sim_cpu *cpu, int32_t offset) static void ldxr (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); - int size = uimm (aarch64_get_instr (cpu), 31, 30); - /* int ordered = uimm (aarch64_get_instr (cpu), 15, 15); */ - /* int exclusive = ! uimm (aarch64_get_instr (cpu), 23, 23); */ + int size = INSTR (31, 30); + /* int ordered = INSTR (15, 15); */ + /* int exclusive = ! INSTR (23, 23); */ + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); switch (size) { case 0: @@ -1469,19 +1547,17 @@ ldxr (sim_cpu *cpu) case 3: aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address)); break; - default: - HALT_UNALLOC; } } static void stxr (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned rs = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned rn = INSTR (9, 5); + unsigned rt = INSTR (4, 0); + unsigned rs = INSTR (20, 16); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); - int size = uimm (aarch64_get_instr (cpu), 31, 30); + int size = INSTR (31, 30); uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP); switch (size) @@ -1490,9 +1566,9 @@ stxr (sim_cpu *cpu) case 1: aarch64_set_mem_u16 (cpu, address, data); break; case 2: aarch64_set_mem_u32 (cpu, address, data); break; case 3: aarch64_set_mem_u64 (cpu, address, data); break; - default: HALT_UNALLOC; } + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */ } @@ -1508,9 +1584,8 @@ dexLoadLiteral (sim_cpu *cpu) instr[26] ==> V : 0 ==> GReg, 1 ==> FReg instr[23, 5] == simm19 */ - /* unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); */ - uint32_t dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 30) << 1) - | uimm (aarch64_get_instr (cpu), 26, 26)); + /* unsigned rt = INSTR (4, 0); */ + uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26); int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5); switch (dispatch) @@ -1540,9 +1615,10 @@ dexLoadLiteral (sim_cpu *cpu) static void add32 (sim_cpu *cpu, uint32_t aimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, SP_OK, aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm); } @@ -1551,9 +1627,10 @@ add32 (sim_cpu *cpu, uint32_t aimm) static void add64 (sim_cpu *cpu, uint32_t aimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, SP_OK, aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm); } @@ -1573,7 +1650,7 @@ set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2) if (result & (1 << 31)) flags |= N; - if (uresult != result) + if (uresult != (uint32_t)result) flags |= C; if (sresult != result) @@ -1582,61 +1659,40 @@ set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2) aarch64_set_CPSR (cpu, flags); } +#define NEG(a) (((a) & signbit) == signbit) +#define POS(a) (((a) & signbit) == 0) + static void set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2) { - int64_t sval1 = value1; - int64_t sval2 = value2; - uint64_t result = value1 + value2; - int64_t sresult = sval1 + sval2; - uint32_t flags = 0; + uint64_t result = value1 + value2; + uint32_t flags = 0; + uint64_t signbit = 1ULL << 63; if (result == 0) flags |= Z; - if (result & (1ULL << 63)) + if (NEG (result)) flags |= N; - if (sval1 < 0) - { - if (sval2 < 0) - { - /* Negative plus a negative. Overflow happens if - the result is greater than either of the operands. */ - if (sresult > sval1 || sresult > sval2) - flags |= V; - } - /* else Negative plus a positive. Overflow cannot happen. */ - } - else /* value1 is +ve. */ - { - if (sval2 < 0) - { - /* Overflow can only occur if we computed "0 - MININT". */ - if (sval1 == 0 && sval2 == (1LL << 63)) - flags |= V; - } - else - { - /* Postive plus positive - overflow has happened if the - result is smaller than either of the operands. */ - if (result < value1 || result < value2) - flags |= V | C; - } - } + if ( (NEG (value1) && NEG (value2)) + || (NEG (value1) && POS (result)) + || (NEG (value2) && POS (result))) + flags |= C; + + if ( (NEG (value1) && NEG (value2) && POS (result)) + || (POS (value1) && POS (value2) && NEG (result))) + flags |= V; aarch64_set_CPSR (cpu, flags); } -#define NEG(a) (((a) & signbit) == signbit) -#define POS(a) (((a) & signbit) == 0) - static void set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2) { uint32_t result = value1 - value2; uint32_t flags = 0; - uint32_t signbit = 1ULL << 31; + uint32_t signbit = 1U << 31; if (result == 0) flags |= Z; @@ -1721,11 +1777,12 @@ set_flags_for_binop64 (sim_cpu *cpu, uint64_t result) static void adds32 (sim_cpu *cpu, uint32_t aimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); /* TODO : do we need to worry about signs here? */ int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm); set_flags_for_add32 (cpu, value1, aimm); } @@ -1734,11 +1791,12 @@ adds32 (sim_cpu *cpu, uint32_t aimm) static void adds64 (sim_cpu *cpu, uint32_t aimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); uint64_t value2 = aimm; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); set_flags_for_add64 (cpu, value1, value2); } @@ -1747,9 +1805,10 @@ adds64 (sim_cpu *cpu, uint32_t aimm) static void sub32 (sim_cpu *cpu, uint32_t aimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, SP_OK, aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm); } @@ -1758,9 +1817,10 @@ sub32 (sim_cpu *cpu, uint32_t aimm) static void sub64 (sim_cpu *cpu, uint32_t aimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, SP_OK, aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm); } @@ -1769,11 +1829,12 @@ sub64 (sim_cpu *cpu, uint32_t aimm) static void subs32 (sim_cpu *cpu, uint32_t aimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); uint32_t value2 = aimm; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); set_flags_for_sub32 (cpu, value1, value2); } @@ -1782,11 +1843,12 @@ subs32 (sim_cpu *cpu, uint32_t aimm) static void subs64 (sim_cpu *cpu, uint32_t aimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); uint32_t value2 = aimm; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); set_flags_for_sub64 (cpu, value1, value2); } @@ -1853,10 +1915,11 @@ shifted64 (uint64_t value, Shift shift, uint32_t count) static void add32_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), @@ -1867,10 +1930,11 @@ add32_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void add64_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), @@ -1881,14 +1945,15 @@ add64_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); set_flags_for_add32 (cpu, value1, value2); } @@ -1897,14 +1962,15 @@ adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); set_flags_for_add64 (cpu, value1, value2); } @@ -1913,10 +1979,11 @@ adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), @@ -1927,10 +1994,11 @@ sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), @@ -1941,14 +2009,15 @@ sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); set_flags_for_sub32 (cpu, value1, value2); } @@ -1957,14 +2026,15 @@ subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); set_flags_for_sub64 (cpu, value1, value2); } @@ -2021,10 +2091,11 @@ extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension) static void add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, SP_OK, aarch64_get_reg_u32 (cpu, rn, SP_OK) + (extreg32 (cpu, rm, extension) << shift)); @@ -2035,10 +2106,11 @@ add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) static void add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, SP_OK, aarch64_get_reg_u64 (cpu, rn, SP_OK) + (extreg64 (cpu, rm, extension) << shift)); @@ -2048,13 +2120,14 @@ add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) static void adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK); uint32_t value2 = extreg32 (cpu, rm, extension) << shift; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); set_flags_for_add32 (cpu, value1, value2); } @@ -2064,13 +2137,14 @@ adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) static void adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); uint64_t value2 = extreg64 (cpu, rm, extension) << shift; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); set_flags_for_add64 (cpu, value1, value2); } @@ -2079,10 +2153,11 @@ adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) static void sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, SP_OK, aarch64_get_reg_u32 (cpu, rn, SP_OK) - (extreg32 (cpu, rm, extension) << shift)); @@ -2093,10 +2168,11 @@ sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) static void sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, SP_OK, aarch64_get_reg_u64 (cpu, rn, SP_OK) - (extreg64 (cpu, rm, extension) << shift)); @@ -2106,13 +2182,14 @@ sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) static void subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK); uint32_t value2 = extreg32 (cpu, rm, extension) << shift; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); set_flags_for_sub32 (cpu, value1, value2); } @@ -2122,13 +2199,14 @@ subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) static void subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); uint64_t value2 = extreg64 (cpu, rm, extension) << shift; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); set_flags_for_sub64 (cpu, value1, value2); } @@ -2146,9 +2224,9 @@ dexAddSubtractImmediate (sim_cpu *cpu) instr[4,0] = Rd */ /* N.B. the shift is applied at decode before calling the add/sub routine. */ - uint32_t shift = uimm (aarch64_get_instr (cpu), 23, 22); - uint32_t imm = uimm (aarch64_get_instr (cpu), 21, 10); - uint32_t dispatch = uimm (aarch64_get_instr (cpu), 31, 29); + uint32_t shift = INSTR (23, 22); + uint32_t imm = INSTR (21, 10); + uint32_t dispatch = INSTR (31, 29); NYI_assert (28, 24, 0x11); @@ -2168,8 +2246,6 @@ dexAddSubtractImmediate (sim_cpu *cpu) case 5: adds64 (cpu, imm); break; case 6: sub64 (cpu, imm); break; case 7: subs64 (cpu, imm); break; - default: - HALT_UNALLOC; } } @@ -2186,25 +2262,24 @@ dexAddSubtractShiftedRegister (sim_cpu *cpu) instr[9,5] = Rn instr[4,0] = Rd */ - uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); - /* 32 bit operations must have count[5] = 0 - or else we have an UNALLOC. */ - uint32_t count = uimm (aarch64_get_instr (cpu), 15, 10); - /* Shift encoded as ROR is unallocated. */ - Shift shiftType = shift (aarch64_get_instr (cpu), 22); - /* Dispatch on size:op i.e aarch64_get_instr (cpu)[31,29]. */ - uint32_t dispatch = uimm (aarch64_get_instr (cpu), 31, 29); + uint32_t size = INSTR (31, 31); + uint32_t count = INSTR (15, 10); + Shift shiftType = INSTR (23, 22); NYI_assert (28, 24, 0x0B); NYI_assert (21, 21, 0); + /* Shift encoded as ROR is unallocated. */ if (shiftType == ROR) HALT_UNALLOC; - if (!size && uimm (count, 5, 5)) + /* 32 bit operations must have count[5] = 0 + or else we have an UNALLOC. */ + if (size == 0 && uimm (count, 5, 5)) HALT_UNALLOC; - switch (dispatch) + /* Dispatch on size:op i.e instr [31,29]. */ + switch (INSTR (31, 29)) { case 0: add32_shift (cpu, shiftType, count); break; case 1: adds32_shift (cpu, shiftType, count); break; @@ -2214,8 +2289,6 @@ dexAddSubtractShiftedRegister (sim_cpu *cpu) case 5: adds64_shift (cpu, shiftType, count); break; case 6: sub64_shift (cpu, shiftType, count); break; case 7: subs64_shift (cpu, shiftType, count); break; - default: - HALT_UNALLOC; } } @@ -2237,10 +2310,8 @@ dexAddSubtractExtendedRegister (sim_cpu *cpu) instr[9,5] = Rn instr[4,0] = Rd */ - Extension extensionType = extension (aarch64_get_instr (cpu), 13); - uint32_t shift = uimm (aarch64_get_instr (cpu), 12, 10); - /* dispatch on size:op:set? i.e aarch64_get_instr (cpu)[31,29] */ - uint32_t dispatch = uimm (aarch64_get_instr (cpu), 31, 29); + Extension extensionType = INSTR (15, 13); + uint32_t shift = INSTR (12, 10); NYI_assert (28, 24, 0x0B); NYI_assert (21, 21, 1); @@ -2249,7 +2320,8 @@ dexAddSubtractExtendedRegister (sim_cpu *cpu) if (shift > 4) HALT_UNALLOC; - switch (dispatch) + /* Dispatch on size:op:set?. */ + switch (INSTR (31, 29)) { case 0: add32_ext (cpu, extensionType, shift); break; case 1: adds32_ext (cpu, extensionType, shift); break; @@ -2259,7 +2331,6 @@ dexAddSubtractExtendedRegister (sim_cpu *cpu) case 5: adds64_ext (cpu, extensionType, shift); break; case 6: sub64_ext (cpu, extensionType, shift); break; case 7: subs64_ext (cpu, extensionType, shift); break; - default: HALT_UNALLOC; } } @@ -2272,10 +2343,11 @@ dexAddSubtractExtendedRegister (sim_cpu *cpu) static void adc32 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) + aarch64_get_reg_u32 (cpu, rm, NO_SP) @@ -2286,10 +2358,11 @@ adc32 (sim_cpu *cpu) static void adc64 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) + aarch64_get_reg_u64 (cpu, rm, NO_SP) @@ -2300,14 +2373,15 @@ adc64 (sim_cpu *cpu) static void adcs32 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP); uint32_t carry = IS_SET (C); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry); set_flags_for_add32 (cpu, value1, value2 + carry); } @@ -2316,14 +2390,15 @@ adcs32 (sim_cpu *cpu) static void adcs64 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); uint64_t carry = IS_SET (C); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry); set_flags_for_add64 (cpu, value1, value2 + carry); } @@ -2332,10 +2407,11 @@ adcs64 (sim_cpu *cpu) static void sbc32 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */ + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) - aarch64_get_reg_u32 (cpu, rm, NO_SP) @@ -2346,10 +2422,11 @@ sbc32 (sim_cpu *cpu) static void sbc64 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) - aarch64_get_reg_u64 (cpu, rm, NO_SP) @@ -2360,15 +2437,16 @@ sbc64 (sim_cpu *cpu) static void sbcs32 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP); uint32_t carry = IS_SET (C); uint32_t result = value1 - value2 + 1 - carry; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, result); set_flags_for_sub32 (cpu, value1, value2 + 1 - carry); } @@ -2377,15 +2455,16 @@ sbcs32 (sim_cpu *cpu) static void sbcs64 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); uint64_t carry = IS_SET (C); uint64_t result = value1 - value2 + 1 - carry; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, result); set_flags_for_sub64 (cpu, value1, value2 + 1 - carry); } @@ -2402,16 +2481,15 @@ dexAddSubtractWithCarry (sim_cpu *cpu) instr[9,5] = Rn instr[4,0] = Rd */ - uint32_t op2 = uimm (aarch64_get_instr (cpu), 15, 10); - /* Dispatch on size:op:set? i.e aarch64_get_instr (cpu)[31,29] */ - uint32_t dispatch = uimm (aarch64_get_instr (cpu), 31, 29); + uint32_t op2 = INSTR (15, 10); NYI_assert (28, 21, 0xD0); if (op2 != 0) HALT_UNALLOC; - switch (dispatch) + /* Dispatch on size:op:set?. */ + switch (INSTR (31, 29)) { case 0: adc32 (cpu); break; case 1: adcs32 (cpu); break; @@ -2421,7 +2499,6 @@ dexAddSubtractWithCarry (sim_cpu *cpu) case 5: adcs64 (cpu); break; case 6: sbc64 (cpu); break; case 7: sbcs64 (cpu); break; - default: HALT_UNALLOC; } } @@ -2465,7 +2542,7 @@ static void CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */ { /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit - instr[30] = compare with positive (0) or negative value (1) + instr[30] = compare with positive (1) or negative value (0) instr[29,21] = 1 1101 0010 instr[20,16] = Rm or const instr[15,12] = cond @@ -2482,19 +2559,20 @@ CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */ NYI_assert (10, 10, 0); NYI_assert (4, 4, 0); - if (! testConditionCode (cpu, uimm (aarch64_get_instr (cpu), 15, 12))) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (! testConditionCode (cpu, INSTR (15, 12))) { - aarch64_set_CPSR (cpu, uimm (aarch64_get_instr (cpu), 3, 0)); + aarch64_set_CPSR (cpu, INSTR (3, 0)); return; } - negate = uimm (aarch64_get_instr (cpu), 30, 30) ? -1 : 1; - rm = uimm (aarch64_get_instr (cpu), 20, 16); - rn = uimm (aarch64_get_instr (cpu), 9, 5); + negate = INSTR (30, 30) ? 1 : -1; + rm = INSTR (20, 16); + rn = INSTR ( 9, 5); - if (uimm (aarch64_get_instr (cpu), 31, 31)) + if (INSTR (31, 31)) { - if (uimm (aarch64_get_instr (cpu), 11, 11)) + if (INSTR (11, 11)) set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK), negate * (uint64_t) rm); else @@ -2503,7 +2581,7 @@ CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */ } else { - if (uimm (aarch64_get_instr (cpu), 11, 11)) + if (INSTR (11, 11)) set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK), negate * rm); else @@ -2525,58 +2603,166 @@ do_vec_MOV_whole_vector (sim_cpu *cpu) instr[9,5] = Vs instr[4,0] = Vd */ - unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vs = INSTR (9, 5); + unsigned vd = INSTR (4, 0); NYI_assert (29, 21, 0x075); NYI_assert (15, 10, 0x07); - if (uimm (aarch64_get_instr (cpu), 20, 16) != vs) + if (INSTR (20, 16) != vs) HALT_NYI; - if (uimm (aarch64_get_instr (cpu), 30, 30)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (30, 30)) aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1)); aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0)); } static void -do_vec_MOV_into_scalar (sim_cpu *cpu) +do_vec_SMOV_into_scalar (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = word(0)/long(1) + instr[29,21] = 00 1110 000 + instr[20,16] = element size and index + instr[15,10] = 00 0010 11 + instr[9,5] = V source + instr[4,0] = R dest */ + + unsigned vs = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + unsigned imm5 = INSTR (20, 16); + unsigned full = INSTR (30, 30); + int size, index; + + NYI_assert (29, 21, 0x070); + NYI_assert (15, 10, 0x0B); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + + if (imm5 & 0x1) + { + size = 0; + index = (imm5 >> 1) & 0xF; + } + else if (imm5 & 0x2) + { + size = 1; + index = (imm5 >> 2) & 0x7; + } + else if (full && (imm5 & 0x4)) + { + size = 2; + index = (imm5 >> 3) & 0x3; + } + else + HALT_UNALLOC; + + switch (size) + { + case 0: + if (full) + aarch64_set_reg_s64 (cpu, rd, NO_SP, + aarch64_get_vec_s8 (cpu, vs, index)); + else + aarch64_set_reg_s32 (cpu, rd, NO_SP, + aarch64_get_vec_s8 (cpu, vs, index)); + break; + + case 1: + if (full) + aarch64_set_reg_s64 (cpu, rd, NO_SP, + aarch64_get_vec_s16 (cpu, vs, index)); + else + aarch64_set_reg_s32 (cpu, rd, NO_SP, + aarch64_get_vec_s16 (cpu, vs, index)); + break; + + case 2: + aarch64_set_reg_s64 (cpu, rd, NO_SP, + aarch64_get_vec_s32 (cpu, vs, index)); + break; + + default: + HALT_UNALLOC; + } +} + +static void +do_vec_UMOV_into_scalar (sim_cpu *cpu) { /* instr[31] = 0 instr[30] = word(0)/long(1) instr[29,21] = 00 1110 000 - instr[20,18] = element size and index - instr[17,10] = 00 0011 11 + instr[20,16] = element size and index + instr[15,10] = 00 0011 11 instr[9,5] = V source instr[4,0] = R dest */ - unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vs = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + unsigned imm5 = INSTR (20, 16); + unsigned full = INSTR (30, 30); + int size, index; NYI_assert (29, 21, 0x070); - NYI_assert (17, 10, 0x0F); + NYI_assert (15, 10, 0x0F); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + + if (!full) + { + if (imm5 & 0x1) + { + size = 0; + index = (imm5 >> 1) & 0xF; + } + else if (imm5 & 0x2) + { + size = 1; + index = (imm5 >> 2) & 0x7; + } + else if (imm5 & 0x4) + { + size = 2; + index = (imm5 >> 3) & 0x3; + } + else + HALT_UNALLOC; + } + else if (imm5 & 0x8) + { + size = 3; + index = (imm5 >> 4) & 0x1; + } + else + HALT_UNALLOC; - switch (uimm (aarch64_get_instr (cpu), 20, 18)) + switch (size) { - case 0x2: - aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0)); + case 0: + aarch64_set_reg_u32 (cpu, rd, NO_SP, + aarch64_get_vec_u8 (cpu, vs, index)); + break; + + case 1: + aarch64_set_reg_u32 (cpu, rd, NO_SP, + aarch64_get_vec_u16 (cpu, vs, index)); break; - case 0x6: - aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1)); + case 2: + aarch64_set_reg_u32 (cpu, rd, NO_SP, + aarch64_get_vec_u32 (cpu, vs, index)); break; - case 0x1: - case 0x3: - case 0x5: - case 0x7: - aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32 - (cpu, vs, uimm (aarch64_get_instr (cpu), 20, 19))); + case 3: + aarch64_set_reg_u64 (cpu, rd, NO_SP, + aarch64_get_vec_u64 (cpu, vs, index)); break; default: - HALT_NYI; + HALT_UNALLOC; } } @@ -2590,33 +2776,34 @@ do_vec_INS (sim_cpu *cpu) instr[4,0] = V dest */ int index; - unsigned rs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rs = INSTR (9, 5); + unsigned vd = INSTR (4, 0); NYI_assert (31, 21, 0x270); NYI_assert (15, 10, 0x07); - if (uimm (aarch64_get_instr (cpu), 16, 16)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (16, 16)) { - index = uimm (aarch64_get_instr (cpu), 20, 17); + index = INSTR (20, 17); aarch64_set_vec_u8 (cpu, vd, index, aarch64_get_reg_u8 (cpu, rs, NO_SP)); } - else if (uimm (aarch64_get_instr (cpu), 17, 17)) + else if (INSTR (17, 17)) { - index = uimm (aarch64_get_instr (cpu), 20, 18); + index = INSTR (20, 18); aarch64_set_vec_u16 (cpu, vd, index, aarch64_get_reg_u16 (cpu, rs, NO_SP)); } - else if (uimm (aarch64_get_instr (cpu), 18, 18)) + else if (INSTR (18, 18)) { - index = uimm (aarch64_get_instr (cpu), 20, 19); + index = INSTR (20, 19); aarch64_set_vec_u32 (cpu, vd, index, aarch64_get_reg_u32 (cpu, rs, NO_SP)); } - else if (uimm (aarch64_get_instr (cpu), 19, 19)) + else if (INSTR (19, 19)) { - index = uimm (aarch64_get_instr (cpu), 20, 20); + index = INSTR (20, 20); aarch64_set_vec_u64 (cpu, vd, index, aarch64_get_reg_u64 (cpu, rs, NO_SP)); } @@ -2635,44 +2822,45 @@ do_vec_DUP_vector_into_vector (sim_cpu *cpu) instr[9,5] = V source instr[4,0] = V dest. */ - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = INSTR (30, 30); + unsigned vs = INSTR (9, 5); + unsigned vd = INSTR (4, 0); int i, index; NYI_assert (29, 21, 0x070); NYI_assert (15, 10, 0x01); - if (uimm (aarch64_get_instr (cpu), 16, 16)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (16, 16)) { - index = uimm (aarch64_get_instr (cpu), 20, 17); + index = INSTR (20, 17); for (i = 0; i < (full ? 16 : 8); i++) aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index)); } - else if (uimm (aarch64_get_instr (cpu), 17, 17)) + else if (INSTR (17, 17)) { - index = uimm (aarch64_get_instr (cpu), 20, 18); + index = INSTR (20, 18); for (i = 0; i < (full ? 8 : 4); i++) aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index)); } - else if (uimm (aarch64_get_instr (cpu), 18, 18)) + else if (INSTR (18, 18)) { - index = uimm (aarch64_get_instr (cpu), 20, 19); + index = INSTR (20, 19); for (i = 0; i < (full ? 4 : 2); i++) aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index)); } else { - if (uimm (aarch64_get_instr (cpu), 19, 19) == 0) + if (INSTR (19, 19) == 0) HALT_UNALLOC; if (! full) HALT_UNALLOC; - index = uimm (aarch64_get_instr (cpu), 20, 20); + index = INSTR (20, 20); for (i = 0; i < 2; i++) aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index)); @@ -2692,16 +2880,17 @@ do_vec_TBL (sim_cpu *cpu) instr[9,5] = V start instr[4,0] = V dest */ - int full = uimm (aarch64_get_instr (cpu), 30, 30); - int len = uimm (aarch64_get_instr (cpu), 14, 13) + 1; - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + int full = INSTR (30, 30); + int len = INSTR (14, 13) + 1; + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; NYI_assert (29, 21, 0x070); NYI_assert (12, 10, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); for (i = 0; i < (full ? 16 : 8); i++) { unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i); @@ -2737,17 +2926,18 @@ do_vec_TRN (sim_cpu *cpu) instr[9,5] = V source instr[4,0] = V dest. */ - int full = uimm (aarch64_get_instr (cpu), 30, 30); - int second = uimm (aarch64_get_instr (cpu), 14, 14); - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + int full = INSTR (30, 30); + int second = INSTR (14, 14); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; NYI_assert (29, 24, 0x0E); NYI_assert (13, 10, 0xA); - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 22)) { case 0: for (i = 0; i < (full ? 8 : 4); i++) @@ -2793,9 +2983,6 @@ do_vec_TRN (sim_cpu *cpu) aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, second ? vn : vm, 1)); break; - - default: - HALT_UNALLOC; } } @@ -2813,14 +3000,15 @@ do_vec_DUP_scalar_into_vector (sim_cpu *cpu) instr[4,0] = V dest. */ unsigned i; - unsigned Vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned Rs = uimm (aarch64_get_instr (cpu), 9, 5); - int both = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned Vd = INSTR (4, 0); + unsigned Rs = INSTR (9, 5); + int both = INSTR (30, 30); NYI_assert (29, 20, 0x0E0); NYI_assert (15, 10, 0x03); - switch (uimm (aarch64_get_instr (cpu), 19, 16)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (19, 16)) { case 1: for (i = 0; i < (both ? 16 : 8); i++) @@ -2864,56 +3052,91 @@ do_vec_UZP (sim_cpu *cpu) instr[9,5] = Vn instr[4,0] = Vd. */ - int full = uimm (aarch64_get_instr (cpu), 30, 30); - int upper = uimm (aarch64_get_instr (cpu), 14, 14); + int full = INSTR (30, 30); + int upper = INSTR (14, 14); - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0); uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1); uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0); uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1); - uint64_t val1 = 0; - uint64_t val2 = 0; + uint64_t val1; + uint64_t val2; - uint64_t input1 = upper ? val_n1 : val_m1; - uint64_t input2 = upper ? val_n2 : val_m2; - unsigned i; + uint64_t input2 = full ? val_n2 : val_m1; NYI_assert (29, 24, 0x0E); NYI_assert (21, 21, 0); NYI_assert (15, 15, 0); NYI_assert (13, 10, 6); - switch (uimm (aarch64_get_instr (cpu), 23, 23)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 22)) { case 0: - for (i = 0; i < 8; i++) + val1 = (val_n1 >> (upper * 8)) & 0xFFULL; + val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL; + val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL; + val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL; + + val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL; + val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL; + val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL; + val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL; + + if (full) { - val1 |= (input1 >> (i * 8)) & (0xFFULL << (i * 8)); - val2 |= (input2 >> (i * 8)) & (0xFFULL << (i * 8)); + val2 = (val_m1 >> (upper * 8)) & 0xFFULL; + val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL; + val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL; + val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL; + + val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL; + val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL; + val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL; + val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL; } break; case 1: - for (i = 0; i < 4; i++) + val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL; + val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL; + + val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;; + val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL; + + if (full) { - val1 |= (input1 >> (i * 16)) & (0xFFFFULL << (i * 16)); - val2 |= (input2 >> (i * 16)) & (0xFFFFULL << (i * 16)); + val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL; + val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL; + + val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL; + val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL; } break; case 2: - val1 = ((input1 & 0xFFFFFFFF) | ((input1 >> 32) & 0xFFFFFFFF00000000ULL)); - val2 = ((input2 & 0xFFFFFFFF) | ((input2 >> 32) & 0xFFFFFFFF00000000ULL)); + val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF; + val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL; + + if (full) + { + val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF; + val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL; + } + break; case 3: - val1 = input1; - val2 = input2; - break; + if (! full) + HALT_UNALLOC; + + val1 = upper ? val_n2 : val_n1; + val2 = upper ? val_m2 : val_m1; + break; } aarch64_set_vec_u64 (cpu, vd, 0, val1); @@ -2936,12 +3159,12 @@ do_vec_ZIP (sim_cpu *cpu) instr[9,5] = Vn instr[4,0] = Vd. */ - int full = uimm (aarch64_get_instr (cpu), 30, 30); - int upper = uimm (aarch64_get_instr (cpu), 14, 14); + int full = INSTR (30, 30); + int upper = INSTR (14, 14); - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0); uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1); @@ -2959,7 +3182,8 @@ do_vec_ZIP (sim_cpu *cpu) NYI_assert (15, 15, 0); NYI_assert (13, 10, 0xE); - switch (uimm (aarch64_get_instr (cpu), 23, 23)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 23)) { case 0: val1 = @@ -3111,22 +3335,22 @@ do_vec_MOV_immediate (sim_cpu *cpu) instr[9,5] = low 5-bits of uimm8 instr[4,0] = Vd. */ - int full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned val = uimm (aarch64_get_instr (cpu), 18, 16) << 5 - | uimm (aarch64_get_instr (cpu), 9, 5); + int full = INSTR (30, 30); + unsigned vd = INSTR (4, 0); + unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5); unsigned i; NYI_assert (29, 19, 0x1E0); NYI_assert (11, 10, 1); - switch (uimm (aarch64_get_instr (cpu), 15, 12)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (15, 12)) { case 0x0: /* 32-bit, no shift. */ case 0x2: /* 32-bit, shift by 8. */ case 0x4: /* 32-bit, shift by 16. */ case 0x6: /* 32-bit, shift by 24. */ - val <<= (8 * uimm (aarch64_get_instr (cpu), 14, 13)); + val <<= (8 * INSTR (14, 13)); for (i = 0; i < (full ? 4 : 2); i++) aarch64_set_vec_u32 (cpu, vd, i, val); break; @@ -3137,7 +3361,8 @@ do_vec_MOV_immediate (sim_cpu *cpu) case 0x8: /* 16-bit, no shift. */ for (i = 0; i < (full ? 8 : 4); i++) aarch64_set_vec_u16 (cpu, vd, i, val); - /* Fall through. */ + break; + case 0xd: /* 32-bit, mask shift by 16. */ val <<= 8; val |= 0xFF; @@ -3179,22 +3404,22 @@ do_vec_MVNI (sim_cpu *cpu) instr[9,5] = low 5-bits of uimm8 instr[4,0] = Vd. */ - int full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned val = uimm (aarch64_get_instr (cpu), 18, 16) << 5 - | uimm (aarch64_get_instr (cpu), 9, 5); + int full = INSTR (30, 30); + unsigned vd = INSTR (4, 0); + unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5); unsigned i; NYI_assert (29, 19, 0x5E0); NYI_assert (11, 10, 1); - switch (uimm (aarch64_get_instr (cpu), 15, 12)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (15, 12)) { case 0x0: /* 32-bit, no shift. */ case 0x2: /* 32-bit, shift by 8. */ case 0x4: /* 32-bit, shift by 16. */ case 0x6: /* 32-bit, shift by 24. */ - val <<= (8 * uimm (aarch64_get_instr (cpu), 14, 13)); + val <<= (8 * INSTR (14, 13)); val = ~ val; for (i = 0; i < (full ? 4 : 2); i++) aarch64_set_vec_u32 (cpu, vd, i, val); @@ -3225,9 +3450,9 @@ do_vec_MVNI (sim_cpu *cpu) for (i = 0; i < 8; i++) if (val & (1 << i)) - mask |= (0xF << (i * 4)); + mask |= (0xFFUL << (i * 8)); aarch64_set_vec_u64 (cpu, vd, 0, mask); - aarch64_set_vec_u64 (cpu, vd, 1, 0); + aarch64_set_vec_u64 (cpu, vd, 1, mask); return; } @@ -3261,15 +3486,16 @@ do_vec_ABS (sim_cpu *cpu) instr[9,5] = Vn instr[4.0] = Vd. */ - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); + unsigned full = INSTR (30, 30); unsigned i; NYI_assert (29, 24, 0x0E); NYI_assert (21, 10, 0x82E); - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 22)) { case 0: for (i = 0; i < (full ? 16 : 8); i++) @@ -3310,45 +3536,48 @@ do_vec_ADDV (sim_cpu *cpu) instr[9,5] = Vm instr[4.0] = Rd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (9, 5); + unsigned rd = INSTR (4, 0); unsigned i; - uint64_t val = 0; - int full = uimm (aarch64_get_instr (cpu), 30, 30); + int full = INSTR (30, 30); NYI_assert (29, 24, 0x0E); NYI_assert (21, 10, 0xC6E); - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 22)) { case 0: - for (i = 0; i < (full ? 16 : 8); i++) - val += aarch64_get_vec_u8 (cpu, vm, i); - aarch64_set_reg_u64 (cpu, rd, NO_SP, val); - return; + { + uint8_t val = 0; + for (i = 0; i < (full ? 16 : 8); i++) + val += aarch64_get_vec_u8 (cpu, vm, i); + aarch64_set_vec_u64 (cpu, rd, 0, val); + return; + } case 1: - for (i = 0; i < (full ? 8 : 4); i++) - val += aarch64_get_vec_u16 (cpu, vm, i); - aarch64_set_reg_u64 (cpu, rd, NO_SP, val); - return; + { + uint16_t val = 0; + for (i = 0; i < (full ? 8 : 4); i++) + val += aarch64_get_vec_u16 (cpu, vm, i); + aarch64_set_vec_u64 (cpu, rd, 0, val); + return; + } case 2: - for (i = 0; i < (full ? 4 : 2); i++) - val += aarch64_get_vec_u32 (cpu, vm, i); - aarch64_set_reg_u64 (cpu, rd, NO_SP, val); - return; + { + uint32_t val = 0; + if (! full) + HALT_UNALLOC; + for (i = 0; i < 4; i++) + val += aarch64_get_vec_u32 (cpu, vm, i); + aarch64_set_vec_u64 (cpu, rd, 0, val); + return; + } case 3: - if (! full) - HALT_UNALLOC; - val = aarch64_get_vec_u64 (cpu, vm, 0); - val += aarch64_get_vec_u64 (cpu, vm, 1); - aarch64_set_reg_u64 (cpu, rd, NO_SP, val); - return; - - default: - HALT_UNREACHABLE; + HALT_UNALLOC; } } @@ -3364,55 +3593,71 @@ do_vec_ins_2 (sim_cpu *cpu) instr[4,0] = Vd. */ unsigned elem; - unsigned vm = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (9, 5); + unsigned vd = INSTR (4, 0); NYI_assert (31, 21, 0x270); NYI_assert (17, 14, 0); NYI_assert (12, 10, 7); - if (uimm (aarch64_get_instr (cpu), 13, 13) == 1) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (13, 13) == 1) { - if (uimm (aarch64_get_instr (cpu), 18, 18) == 1) + if (INSTR (18, 18) == 1) { /* 32-bit moves. */ - elem = uimm (aarch64_get_instr (cpu), 20, 19); + elem = INSTR (20, 19); aarch64_set_reg_u64 (cpu, vd, NO_SP, aarch64_get_vec_u32 (cpu, vm, elem)); } else { /* 64-bit moves. */ - if (uimm (aarch64_get_instr (cpu), 19, 19) != 1) + if (INSTR (19, 19) != 1) HALT_NYI; - elem = uimm (aarch64_get_instr (cpu), 20, 20); + elem = INSTR (20, 20); aarch64_set_reg_u64 (cpu, vd, NO_SP, aarch64_get_vec_u64 (cpu, vm, elem)); } } else { - if (uimm (aarch64_get_instr (cpu), 18, 18) == 1) + if (INSTR (18, 18) == 1) { /* 32-bit moves. */ - elem = uimm (aarch64_get_instr (cpu), 20, 19); + elem = INSTR (20, 19); aarch64_set_vec_u32 (cpu, vd, elem, aarch64_get_reg_u32 (cpu, vm, NO_SP)); } else { /* 64-bit moves. */ - if (uimm (aarch64_get_instr (cpu), 19, 19) != 1) + if (INSTR (19, 19) != 1) HALT_NYI; - elem = uimm (aarch64_get_instr (cpu), 20, 20); + elem = INSTR (20, 20); aarch64_set_vec_u64 (cpu, vd, elem, aarch64_get_reg_u64 (cpu, vm, NO_SP)); } } } +#define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \ + do \ + { \ + DST_TYPE a[N], b[N]; \ + \ + for (i = 0; i < (N); i++) \ + { \ + a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \ + b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \ + } \ + for (i = 0; i < (N); i++) \ + aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \ + } \ + while (0) + static void do_vec_mull (sim_cpu *cpu) { @@ -3427,67 +3672,49 @@ do_vec_mull (sim_cpu *cpu) instr[9,5] = Vn instr[4.0] = Vd. */ - int unsign = uimm (aarch64_get_instr (cpu), 29, 29); - int bias = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + int unsign = INSTR (29, 29); + int bias = INSTR (30, 30); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR ( 9, 5); + unsigned vd = INSTR ( 4, 0); unsigned i; NYI_assert (28, 24, 0x0E); NYI_assert (15, 10, 0x30); - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + /* NB: Read source values before writing results, in case + the source and destination vectors are the same. */ + switch (INSTR (23, 22)) { case 0: if (bias) bias = 8; if (unsign) - for (i = 0; i < 8; i++) - aarch64_set_vec_u16 (cpu, vd, i, - aarch64_get_vec_u8 (cpu, vn, i + bias) - * aarch64_get_vec_u8 (cpu, vm, i + bias)); + DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16); else - for (i = 0; i < 8; i++) - aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vn, i + bias) - * aarch64_get_vec_s8 (cpu, vm, i + bias)); + DO_VEC_WIDENING_MUL (8, int16_t, s8, s16); return; case 1: if (bias) bias = 4; if (unsign) - for (i = 0; i < 4; i++) - aarch64_set_vec_u32 (cpu, vd, i, - aarch64_get_vec_u16 (cpu, vn, i + bias) - * aarch64_get_vec_u16 (cpu, vm, i + bias)); + DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32); else - for (i = 0; i < 4; i++) - aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vn, i + bias) - * aarch64_get_vec_s16 (cpu, vm, i + bias)); + DO_VEC_WIDENING_MUL (4, int32_t, s16, s32); return; case 2: if (bias) bias = 2; if (unsign) - for (i = 0; i < 2; i++) - aarch64_set_vec_u64 (cpu, vd, i, - (uint64_t) aarch64_get_vec_u32 (cpu, vn, - i + bias) - * (uint64_t) aarch64_get_vec_u32 (cpu, vm, - i + bias)); + DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64); else - for (i = 0; i < 2; i++) - aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vn, i + bias) - * aarch64_get_vec_s32 (cpu, vm, i + bias)); + DO_VEC_WIDENING_MUL (2, int64_t, s32, s64); return; case 3: - default: HALT_NYI; } } @@ -3506,19 +3733,20 @@ do_vec_fadd (sim_cpu *cpu) instr[9,5] = Vn instr[4.0] = Vd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; - int full = uimm (aarch64_get_instr (cpu), 30, 30); + int full = INSTR (30, 30); NYI_assert (29, 24, 0x0E); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x35); - if (uimm (aarch64_get_instr (cpu), 23, 23)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (23, 23)) { - if (uimm (aarch64_get_instr (cpu), 22, 22)) + if (INSTR (22, 22)) { if (! full) HALT_NYI; @@ -3538,7 +3766,7 @@ do_vec_fadd (sim_cpu *cpu) } else { - if (uimm (aarch64_get_instr (cpu), 22, 22)) + if (INSTR (22, 22)) { if (! full) HALT_NYI; @@ -3571,17 +3799,18 @@ do_vec_add (sim_cpu *cpu) instr[9,5] = Vm instr[4.0] = Vd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; - int full = uimm (aarch64_get_instr (cpu), 30, 30); + int full = INSTR (30, 30); NYI_assert (29, 24, 0x0E); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x21); - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 22)) { case 0: for (i = 0; i < (full ? 16 : 8); i++) @@ -3610,9 +3839,6 @@ do_vec_add (sim_cpu *cpu) aarch64_get_vec_u64 (cpu, vn, 1) + aarch64_get_vec_u64 (cpu, vm, 1)); return; - - default: - HALT_UNREACHABLE; } } @@ -3629,49 +3855,32 @@ do_vec_mul (sim_cpu *cpu) instr[9,5] = Vm instr[4.0] = Vd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; - int full = uimm (aarch64_get_instr (cpu), 30, 30); + int full = INSTR (30, 30); + int bias = 0; NYI_assert (29, 24, 0x0E); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x27); - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 22)) { case 0: - for (i = 0; i < (full ? 16 : 8); i++) - { - uint16_t val = aarch64_get_vec_u8 (cpu, vn, i); - val *= aarch64_get_vec_u8 (cpu, vm, i); - - aarch64_set_vec_u16 (cpu, vd, i, val); - } + DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8); return; case 1: - for (i = 0; i < (full ? 8 : 4); i++) - { - uint32_t val = aarch64_get_vec_u16 (cpu, vn, i); - val *= aarch64_get_vec_u16 (cpu, vm, i); - - aarch64_set_vec_u32 (cpu, vd, i, val); - } + DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16); return; case 2: - for (i = 0; i < (full ? 4 : 2); i++) - { - uint64_t val = aarch64_get_vec_u32 (cpu, vn, i); - val *= aarch64_get_vec_u32 (cpu, vm, i); - - aarch64_set_vec_u64 (cpu, vd, i, val); - } + DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32); return; - default: case 3: HALT_UNALLOC; } @@ -3690,53 +3899,44 @@ do_vec_MLA (sim_cpu *cpu) instr[9,5] = Vm instr[4.0] = Vd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; - int full = uimm (aarch64_get_instr (cpu), 30, 30); + int full = INSTR (30, 30); NYI_assert (29, 24, 0x0E); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x25); - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 22)) { case 0: for (i = 0; i < (full ? 16 : 8); i++) - { - uint16_t val = aarch64_get_vec_u8 (cpu, vn, i); - val *= aarch64_get_vec_u8 (cpu, vm, i); - val += aarch64_get_vec_u8 (cpu, vd, i); - - aarch64_set_vec_u16 (cpu, vd, i, val); - } + aarch64_set_vec_u8 (cpu, vd, i, + aarch64_get_vec_u8 (cpu, vd, i) + + (aarch64_get_vec_u8 (cpu, vn, i) + * aarch64_get_vec_u8 (cpu, vm, i))); return; case 1: for (i = 0; i < (full ? 8 : 4); i++) - { - uint32_t val = aarch64_get_vec_u16 (cpu, vn, i); - val *= aarch64_get_vec_u16 (cpu, vm, i); - val += aarch64_get_vec_u16 (cpu, vd, i); - - aarch64_set_vec_u32 (cpu, vd, i, val); - } + aarch64_set_vec_u16 (cpu, vd, i, + aarch64_get_vec_u16 (cpu, vd, i) + + (aarch64_get_vec_u16 (cpu, vn, i) + * aarch64_get_vec_u16 (cpu, vm, i))); return; case 2: for (i = 0; i < (full ? 4 : 2); i++) - { - uint64_t val = aarch64_get_vec_u32 (cpu, vn, i); - val *= aarch64_get_vec_u32 (cpu, vm, i); - val += aarch64_get_vec_u32 (cpu, vd, i); - - aarch64_set_vec_u64 (cpu, vd, i, val); - } + aarch64_set_vec_u32 (cpu, vd, i, + aarch64_get_vec_u32 (cpu, vd, i) + + (aarch64_get_vec_u32 (cpu, vn, i) + * aarch64_get_vec_u32 (cpu, vm, i))); return; default: - case 3: HALT_UNALLOC; } } @@ -3744,13 +3944,13 @@ do_vec_MLA (sim_cpu *cpu) static float fmaxnm (float a, float b) { - if (fpclassify (a) == FP_NORMAL) + if (! isnan (a)) { - if (fpclassify (b) == FP_NORMAL) + if (! isnan (b)) return a > b ? a : b; return a; } - else if (fpclassify (b) == FP_NORMAL) + else if (! isnan (b)) return b; return a; } @@ -3758,13 +3958,13 @@ fmaxnm (float a, float b) static float fminnm (float a, float b) { - if (fpclassify (a) == FP_NORMAL) + if (! isnan (a)) { - if (fpclassify (b) == FP_NORMAL) + if (! isnan (b)) return a < b ? a : b; return a; } - else if (fpclassify (b) == FP_NORMAL) + else if (! isnan (b)) return b; return a; } @@ -3772,13 +3972,13 @@ fminnm (float a, float b) static double dmaxnm (double a, double b) { - if (fpclassify (a) == FP_NORMAL) + if (! isnan (a)) { - if (fpclassify (b) == FP_NORMAL) + if (! isnan (b)) return a > b ? a : b; return a; } - else if (fpclassify (b) == FP_NORMAL) + else if (! isnan (b)) return b; return a; } @@ -3786,13 +3986,13 @@ dmaxnm (double a, double b) static double dminnm (double a, double b) { - if (fpclassify (a) == FP_NORMAL) + if (! isnan (a)) { - if (fpclassify (b) == FP_NORMAL) + if (! isnan (b)) return a < b ? a : b; return a; } - else if (fpclassify (b) == FP_NORMAL) + else if (! isnan (b)) return b; return a; } @@ -3800,29 +4000,30 @@ dminnm (double a, double b) static void do_vec_FminmaxNMP (sim_cpu *cpu) { - /* aarch64_get_instr (cpu)[31] = 0 - aarch64_get_instr (cpu)[30] = half (0)/full (1) - aarch64_get_instr (cpu)[29,24] = 10 1110 - aarch64_get_instr (cpu)[23] = max(0)/min(1) - aarch64_get_instr (cpu)[22] = float (0)/double (1) - aarch64_get_instr (cpu)[21] = 1 - aarch64_get_instr (cpu)[20,16] = Vn - aarch64_get_instr (cpu)[15,10] = 1100 01 - aarch64_get_instr (cpu)[9,5] = Vm - aarch64_get_instr (cpu)[4.0] = Vd. */ - - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - int full = uimm (aarch64_get_instr (cpu), 30, 30); + /* instr [31] = 0 + instr [30] = half (0)/full (1) + instr [29,24] = 10 1110 + instr [23] = max(0)/min(1) + instr [22] = float (0)/double (1) + instr [21] = 1 + instr [20,16] = Vn + instr [15,10] = 1100 01 + instr [9,5] = Vm + instr [4.0] = Vd. */ + + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); + int full = INSTR (30, 30); NYI_assert (29, 24, 0x2E); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x31); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { - double (* fn)(double, double) = uimm (aarch64_get_instr (cpu), 23, 23) + double (* fn)(double, double) = INSTR (23, 23) ? dminnm : dmaxnm; if (! full) @@ -3836,7 +4037,7 @@ do_vec_FminmaxNMP (sim_cpu *cpu) } else { - float (* fn)(float, float) = uimm (aarch64_get_instr (cpu), 23, 23) + float (* fn)(float, float) = INSTR (23, 23) ? fminnm : fmaxnm; aarch64_set_vec_float (cpu, vd, 0, @@ -3868,15 +4069,16 @@ do_vec_AND (sim_cpu *cpu) instr[9,5] = Vn instr[4.0] = Vd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; - int full = uimm (aarch64_get_instr (cpu), 30, 30); + int full = INSTR (30, 30); NYI_assert (29, 21, 0x071); NYI_assert (15, 10, 0x07); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); for (i = 0; i < (full ? 4 : 2); i++) aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) @@ -3894,15 +4096,16 @@ do_vec_BSL (sim_cpu *cpu) instr[9,5] = Vn instr[4.0] = Vd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; - int full = uimm (aarch64_get_instr (cpu), 30, 30); + int full = INSTR (30, 30); NYI_assert (29, 21, 0x173); NYI_assert (15, 10, 0x07); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); for (i = 0; i < (full ? 16 : 8); i++) aarch64_set_vec_u8 (cpu, vd, i, ( aarch64_get_vec_u8 (cpu, vd, i) @@ -3922,15 +4125,16 @@ do_vec_EOR (sim_cpu *cpu) instr[9,5] = Vn instr[4.0] = Vd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; - int full = uimm (aarch64_get_instr (cpu), 30, 30); + int full = INSTR (30, 30); NYI_assert (29, 21, 0x171); NYI_assert (15, 10, 0x07); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); for (i = 0; i < (full ? 4 : 2); i++) aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) @@ -3950,28 +4154,29 @@ do_vec_bit (sim_cpu *cpu) instr[9,5] = Vn instr[4.0] = Vd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned test_false = uimm (aarch64_get_instr (cpu), 22, 22); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); + unsigned full = INSTR (30, 30); + unsigned test_false = INSTR (22, 22); unsigned i; NYI_assert (29, 23, 0x5D); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x07); - if (test_false) - { - for (i = 0; i < (full ? 16 : 8); i++) - if (aarch64_get_vec_u32 (cpu, vn, i) == 0) - aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i)); - } - else + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + for (i = 0; i < (full ? 4 : 2); i++) { - for (i = 0; i < (full ? 16 : 8); i++) - if (aarch64_get_vec_u32 (cpu, vn, i) != 0) - aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i)); + uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i); + uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i); + uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i); + if (test_false) + aarch64_set_vec_u32 (cpu, vd, i, + (vd_val & vm_val) | (vn_val & ~vm_val)); + else + aarch64_set_vec_u32 (cpu, vd, i, + (vd_val & ~vm_val) | (vn_val & vm_val)); } } @@ -3986,15 +4191,16 @@ do_vec_ORN (sim_cpu *cpu) instr[9,5] = Vn instr[4.0] = Vd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; - int full = uimm (aarch64_get_instr (cpu), 30, 30); + int full = INSTR (30, 30); NYI_assert (29, 21, 0x077); NYI_assert (15, 10, 0x07); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); for (i = 0; i < (full ? 16 : 8); i++) aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) @@ -4012,15 +4218,16 @@ do_vec_ORR (sim_cpu *cpu) instr[9,5] = Vn instr[4.0] = Vd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; - int full = uimm (aarch64_get_instr (cpu), 30, 30); + int full = INSTR (30, 30); NYI_assert (29, 21, 0x075); NYI_assert (15, 10, 0x07); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); for (i = 0; i < (full ? 16 : 8); i++) aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) @@ -4038,15 +4245,16 @@ do_vec_BIC (sim_cpu *cpu) instr[9,5] = Vn instr[4.0] = Vd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; - int full = uimm (aarch64_get_instr (cpu), 30, 30); + int full = INSTR (30, 30); NYI_assert (29, 21, 0x073); NYI_assert (15, 10, 0x07); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); for (i = 0; i < (full ? 16 : 8); i++) aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) @@ -4064,53 +4272,86 @@ do_vec_XTN (sim_cpu *cpu) instr[9,5] = Vs instr[4,0] = Vd. */ - unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned bias = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vs = INSTR (9, 5); + unsigned vd = INSTR (4, 0); + unsigned bias = INSTR (30, 30); unsigned i; NYI_assert (29, 24, 0x0E); NYI_assert (21, 10, 0x84A); - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 22)) { case 0: - if (bias) - for (i = 0; i < 8; i++) - aarch64_set_vec_u8 (cpu, vd, i + 8, - aarch64_get_vec_u16 (cpu, vs, i) >> 8); - else - for (i = 0; i < 8; i++) - aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i)); + for (i = 0; i < 8; i++) + aarch64_set_vec_u8 (cpu, vd, i + (bias * 8), + aarch64_get_vec_u16 (cpu, vs, i)); return; case 1: - if (bias) - for (i = 0; i < 4; i++) - aarch64_set_vec_u16 (cpu, vd, i + 4, - aarch64_get_vec_u32 (cpu, vs, i) >> 16); - else - for (i = 0; i < 4; i++) - aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, i)); + for (i = 0; i < 4; i++) + aarch64_set_vec_u16 (cpu, vd, i + (bias * 4), + aarch64_get_vec_u32 (cpu, vs, i)); return; case 2: - if (bias) - for (i = 0; i < 2; i++) - aarch64_set_vec_u32 (cpu, vd, i + 4, - aarch64_get_vec_u64 (cpu, vs, i) >> 32); - else - for (i = 0; i < 2; i++) - aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, i)); + for (i = 0; i < 2; i++) + aarch64_set_vec_u32 (cpu, vd, i + (bias * 2), + aarch64_get_vec_u64 (cpu, vs, i)); return; - - default: - HALT_UNALLOC; } } -#define MAX(A,B) ((A) > (B) ? (A) : (B)) -#define MIN(A,B) ((A) < (B) ? (A) : (B)) +/* Return the number of bits set in the input value. */ +#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# define popcount __builtin_popcount +#else +static int +popcount (unsigned char x) +{ + static const unsigned char popcnt[16] = + { + 0, 1, 1, 2, + 1, 2, 2, 3, + 1, 2, 2, 3, + 2, 3, 3, 4 + }; + + /* Only counts the low 8 bits of the input as that is all we need. */ + return popcnt[x % 16] + popcnt[x / 16]; +} +#endif + +static void +do_vec_CNT (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half (0)/ full (1) + instr[29,24] = 00 1110 + instr[23,22] = size: byte(00) + instr[21,10] = 1000 0001 0110 + instr[9,5] = Vs + instr[4,0] = Vd. */ + + unsigned vs = INSTR (9, 5); + unsigned vd = INSTR (4, 0); + int full = INSTR (30, 30); + int size = INSTR (23, 22); + int i; + + NYI_assert (29, 24, 0x0E); + NYI_assert (21, 10, 0x816); + + if (size != 0) + HALT_UNALLOC; + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, + popcount (aarch64_get_vec_u8 (cpu, vs, i))); +} static void do_vec_maxv (sim_cpu *cpu) @@ -4127,9 +4368,9 @@ do_vec_maxv (sim_cpu *cpu) instr[9,5] = V source instr[4.0] = R dest. */ - unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vs = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + unsigned full = INSTR (30, 30); unsigned i; NYI_assert (28, 24, 0x0E); @@ -4137,30 +4378,29 @@ do_vec_maxv (sim_cpu *cpu) NYI_assert (20, 17, 8); NYI_assert (15, 10, 0x2A); - switch ((uimm (aarch64_get_instr (cpu), 29, 29) << 1) - | uimm (aarch64_get_instr (cpu), 16, 16)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch ((INSTR (29, 29) << 1) | INSTR (16, 16)) { case 0: /* SMAXV. */ { int64_t smax; - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + switch (INSTR (23, 22)) { case 0: smax = aarch64_get_vec_s8 (cpu, vs, 0); for (i = 1; i < (full ? 16 : 8); i++) - smax = MAX (smax, aarch64_get_vec_s8 (cpu, vs, i)); + smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i)); break; case 1: smax = aarch64_get_vec_s16 (cpu, vs, 0); for (i = 1; i < (full ? 8 : 4); i++) - smax = MAX (smax, aarch64_get_vec_s16 (cpu, vs, i)); + smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i)); break; case 2: smax = aarch64_get_vec_s32 (cpu, vs, 0); for (i = 1; i < (full ? 4 : 2); i++) - smax = MAX (smax, aarch64_get_vec_s32 (cpu, vs, i)); + smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i)); break; - default: case 3: HALT_UNALLOC; } @@ -4171,24 +4411,24 @@ do_vec_maxv (sim_cpu *cpu) case 1: /* SMINV. */ { int64_t smin; - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + switch (INSTR (23, 22)) { case 0: smin = aarch64_get_vec_s8 (cpu, vs, 0); for (i = 1; i < (full ? 16 : 8); i++) - smin = MIN (smin, aarch64_get_vec_s8 (cpu, vs, i)); + smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i)); break; case 1: smin = aarch64_get_vec_s16 (cpu, vs, 0); for (i = 1; i < (full ? 8 : 4); i++) - smin = MIN (smin, aarch64_get_vec_s16 (cpu, vs, i)); + smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i)); break; case 2: smin = aarch64_get_vec_s32 (cpu, vs, 0); for (i = 1; i < (full ? 4 : 2); i++) - smin = MIN (smin, aarch64_get_vec_s32 (cpu, vs, i)); + smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i)); break; - default: + case 3: HALT_UNALLOC; } @@ -4199,24 +4439,24 @@ do_vec_maxv (sim_cpu *cpu) case 2: /* UMAXV. */ { uint64_t umax; - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + switch (INSTR (23, 22)) { case 0: umax = aarch64_get_vec_u8 (cpu, vs, 0); for (i = 1; i < (full ? 16 : 8); i++) - umax = MAX (umax, aarch64_get_vec_u8 (cpu, vs, i)); + umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i)); break; case 1: umax = aarch64_get_vec_u16 (cpu, vs, 0); for (i = 1; i < (full ? 8 : 4); i++) - umax = MAX (umax, aarch64_get_vec_u16 (cpu, vs, i)); + umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i)); break; case 2: umax = aarch64_get_vec_u32 (cpu, vs, 0); for (i = 1; i < (full ? 4 : 2); i++) - umax = MAX (umax, aarch64_get_vec_u32 (cpu, vs, i)); + umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i)); break; - default: + case 3: HALT_UNALLOC; } @@ -4227,33 +4467,30 @@ do_vec_maxv (sim_cpu *cpu) case 3: /* UMINV. */ { uint64_t umin; - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + switch (INSTR (23, 22)) { case 0: umin = aarch64_get_vec_u8 (cpu, vs, 0); for (i = 1; i < (full ? 16 : 8); i++) - umin = MIN (umin, aarch64_get_vec_u8 (cpu, vs, i)); + umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i)); break; case 1: umin = aarch64_get_vec_u16 (cpu, vs, 0); for (i = 1; i < (full ? 8 : 4); i++) - umin = MIN (umin, aarch64_get_vec_u16 (cpu, vs, i)); + umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i)); break; case 2: umin = aarch64_get_vec_u32 (cpu, vs, 0); for (i = 1; i < (full ? 4 : 2); i++) - umin = MIN (umin, aarch64_get_vec_u32 (cpu, vs, i)); + umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i)); break; - default: + case 3: HALT_UNALLOC; } aarch64_set_reg_u64 (cpu, rd, NO_SP, umin); return; } - - default: - HALT_UNALLOC; } } @@ -4268,8 +4505,8 @@ do_vec_fminmaxV (sim_cpu *cpu) instr[9,5] = V source instr[4.0] = R dest. */ - unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vs = INSTR (9, 5); + unsigned rd = INSTR (4, 0); unsigned i; float res = aarch64_get_vec_float (cpu, vs, 0); @@ -4277,9 +4514,10 @@ do_vec_fminmaxV (sim_cpu *cpu) NYI_assert (22, 14, 0x0C3); NYI_assert (11, 10, 2); - if (uimm (aarch64_get_instr (cpu), 23, 23)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (23, 23)) { - switch (uimm (aarch64_get_instr (cpu), 13, 12)) + switch (INSTR (13, 12)) { case 0: /* FMNINNMV. */ for (i = 1; i < 4; i++) @@ -4288,7 +4526,7 @@ do_vec_fminmaxV (sim_cpu *cpu) case 3: /* FMINV. */ for (i = 1; i < 4; i++) - res = MIN (res, aarch64_get_vec_float (cpu, vs, i)); + res = min (res, aarch64_get_vec_float (cpu, vs, i)); break; default: @@ -4297,7 +4535,7 @@ do_vec_fminmaxV (sim_cpu *cpu) } else { - switch (uimm (aarch64_get_instr (cpu), 13, 12)) + switch (INSTR (13, 12)) { case 0: /* FMNAXNMV. */ for (i = 1; i < 4; i++) @@ -4306,7 +4544,7 @@ do_vec_fminmaxV (sim_cpu *cpu) case 3: /* FMAXV. */ for (i = 1; i < 4; i++) - res = MAX (res, aarch64_get_vec_float (cpu, vs, i)); + res = max (res, aarch64_get_vec_float (cpu, vs, i)); break; default: @@ -4333,11 +4571,11 @@ do_vec_Fminmax (sim_cpu *cpu) instr[9,5] = Vn instr[4,0] = Vd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned min = uimm (aarch64_get_instr (cpu), 23, 23); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); + unsigned full = INSTR (30, 30); + unsigned min = INSTR (23, 23); unsigned i; NYI_assert (29, 24, 0x0E); @@ -4345,16 +4583,17 @@ do_vec_Fminmax (sim_cpu *cpu) NYI_assert (15, 14, 3); NYI_assert (11, 10, 1); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { double (* func)(double, double); if (! full) HALT_NYI; - if (uimm (aarch64_get_instr (cpu), 13, 12) == 0) + if (INSTR (13, 12) == 0) func = min ? dminnm : dmaxnm; - else if (uimm (aarch64_get_instr (cpu), 13, 12) == 3) + else if (INSTR (13, 12) == 3) func = min ? fmin : fmax; else HALT_NYI; @@ -4368,9 +4607,9 @@ do_vec_Fminmax (sim_cpu *cpu) { float (* func)(float, float); - if (uimm (aarch64_get_instr (cpu), 13, 12) == 0) + if (INSTR (13, 12) == 0) func = min ? fminnm : fmaxnm; - else if (uimm (aarch64_get_instr (cpu), 13, 12) == 3) + else if (INSTR (13, 12) == 3) func = min ? fminf : fmaxf; else HALT_NYI; @@ -4393,15 +4632,16 @@ do_vec_SCVTF (sim_cpu *cpu) instr[9,5] = Vn instr[4,0] = Vd. */ - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned size = uimm (aarch64_get_instr (cpu), 22, 22); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); + unsigned full = INSTR (30, 30); + unsigned size = INSTR (22, 22); unsigned i; NYI_assert (29, 23, 0x1C); NYI_assert (21, 10, 0x876); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); if (size) { if (! full) @@ -4462,8 +4702,6 @@ do_vec_SCVTF (sim_cpu *cpu) aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \ ? -1ULL : 0); \ return; \ - default: \ - HALT_UNALLOC; \ } \ } \ while (0) @@ -4499,8 +4737,6 @@ do_vec_SCVTF (sim_cpu *cpu) aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \ CMP 0 ? -1ULL : 0); \ return; \ - default: \ - HALT_UNALLOC; \ } \ } \ while (0) @@ -4510,7 +4746,7 @@ do_vec_SCVTF (sim_cpu *cpu) { \ if (vm != 0) \ HALT_NYI; \ - if (uimm (aarch64_get_instr (cpu), 22, 22)) \ + if (INSTR (22, 22)) \ { \ if (! full) \ HALT_NYI; \ @@ -4533,7 +4769,7 @@ do_vec_SCVTF (sim_cpu *cpu) #define VEC_FCMP(CMP) \ do \ { \ - if (uimm (aarch64_get_instr (cpu), 22, 22)) \ + if (INSTR (22, 22)) \ { \ if (! full) \ HALT_NYI; \ @@ -4572,31 +4808,32 @@ do_vec_compare (sim_cpu *cpu) instr[9,5] = Vn instr[4.0] = Vd. */ - int full = uimm (aarch64_get_instr (cpu), 30, 30); - int size = uimm (aarch64_get_instr (cpu), 23, 22); - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + int full = INSTR (30, 30); + int size = INSTR (23, 22); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; NYI_assert (28, 24, 0x0E); NYI_assert (21, 21, 1); - if ((uimm (aarch64_get_instr (cpu), 11, 11) - && uimm (aarch64_get_instr (cpu), 14, 14)) - || ((uimm (aarch64_get_instr (cpu), 11, 11) == 0 - && uimm (aarch64_get_instr (cpu), 10, 10) == 0))) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if ((INSTR (11, 11) + && INSTR (14, 14)) + || ((INSTR (11, 11) == 0 + && INSTR (10, 10) == 0))) { /* A compare vs 0. */ if (vm != 0) { - if (uimm (aarch64_get_instr (cpu), 15, 10) == 0x2A) + if (INSTR (15, 10) == 0x2A) do_vec_maxv (cpu); - else if (uimm (aarch64_get_instr (cpu), 15, 10) == 0x32 - || uimm (aarch64_get_instr (cpu), 15, 10) == 0x3E) + else if (INSTR (15, 10) == 0x32 + || INSTR (15, 10) == 0x3E) do_vec_fminmaxV (cpu); - else if (uimm (aarch64_get_instr (cpu), 29, 23) == 0x1C - && uimm (aarch64_get_instr (cpu), 21, 10) == 0x876) + else if (INSTR (29, 23) == 0x1C + && INSTR (21, 10) == 0x876) do_vec_SCVTF (cpu); else HALT_NYI; @@ -4604,12 +4841,11 @@ do_vec_compare (sim_cpu *cpu) } } - if (uimm (aarch64_get_instr (cpu), 14, 14)) + if (INSTR (14, 14)) { /* A floating point compare. */ - unsigned decode = (uimm (aarch64_get_instr (cpu), 29, 29) << 5) - | (uimm (aarch64_get_instr (cpu), 23, 23) << 4) - | uimm (aarch64_get_instr (cpu), 13, 10); + unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4) + | INSTR (13, 10); NYI_assert (15, 15, 1); @@ -4630,14 +4866,14 @@ do_vec_compare (sim_cpu *cpu) } else { - unsigned decode = (uimm (aarch64_get_instr (cpu), 29, 29) << 6) - | uimm (aarch64_get_instr (cpu), 15, 10); + unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10); switch (decode) { case 0x0D: /* 0001101 GT */ VEC_CMP (s, > ); case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= ); case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > ); + case 0x23: /* 0100011 TST */ VEC_CMP (u, & ); case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == ); case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < ); case 0x4D: /* 1001101 HI */ VEC_CMP (u, > ); @@ -4666,11 +4902,12 @@ do_vec_SSHL (sim_cpu *cpu) instr[9,5] = Vn instr[4,0] = Vd. */ - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = INSTR (30, 30); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; + signed int shift; NYI_assert (29, 24, 0x0E); NYI_assert (21, 21, 1); @@ -4678,36 +4915,62 @@ do_vec_SSHL (sim_cpu *cpu) /* FIXME: What is a signed shift left in this context ?. */ - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 22)) { case 0: for (i = 0; i < (full ? 16 : 8); i++) - aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i) - << aarch64_get_vec_s8 (cpu, vm, i)); + { + shift = aarch64_get_vec_s8 (cpu, vm, i); + if (shift >= 0) + aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i) + << shift); + else + aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i) + >> - shift); + } return; case 1: for (i = 0; i < (full ? 8 : 4); i++) - aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i) - << aarch64_get_vec_s16 (cpu, vm, i)); + { + shift = aarch64_get_vec_s8 (cpu, vm, i * 2); + if (shift >= 0) + aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i) + << shift); + else + aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i) + >> - shift); + } return; case 2: for (i = 0; i < (full ? 4 : 2); i++) - aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i) - << aarch64_get_vec_s32 (cpu, vm, i)); + { + shift = aarch64_get_vec_s8 (cpu, vm, i * 4); + if (shift >= 0) + aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i) + << shift); + else + aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i) + >> - shift); + } return; case 3: if (! full) HALT_UNALLOC; for (i = 0; i < 2; i++) - aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i) - << aarch64_get_vec_s64 (cpu, vm, i)); + { + shift = aarch64_get_vec_s8 (cpu, vm, i * 8); + if (shift >= 0) + aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i) + << shift); + else + aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i) + >> - shift); + } return; - - default: - HALT_NYI; } } @@ -4724,45 +4987,72 @@ do_vec_USHL (sim_cpu *cpu) instr[9,5] = Vn instr[4,0] = Vd */ - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = INSTR (30, 30); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; + signed int shift; NYI_assert (29, 24, 0x2E); NYI_assert (15, 10, 0x11); - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 22)) { case 0: - for (i = 0; i < (full ? 16 : 8); i++) - aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) - << aarch64_get_vec_u8 (cpu, vm, i)); + for (i = 0; i < (full ? 16 : 8); i++) + { + shift = aarch64_get_vec_s8 (cpu, vm, i); + if (shift >= 0) + aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) + << shift); + else + aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) + >> - shift); + } return; case 1: for (i = 0; i < (full ? 8 : 4); i++) - aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i) - << aarch64_get_vec_u16 (cpu, vm, i)); + { + shift = aarch64_get_vec_s8 (cpu, vm, i * 2); + if (shift >= 0) + aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i) + << shift); + else + aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i) + >> - shift); + } return; case 2: for (i = 0; i < (full ? 4 : 2); i++) - aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) - << aarch64_get_vec_u32 (cpu, vm, i)); + { + shift = aarch64_get_vec_s8 (cpu, vm, i * 4); + if (shift >= 0) + aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) + << shift); + else + aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) + >> - shift); + } return; case 3: if (! full) HALT_UNALLOC; for (i = 0; i < 2; i++) - aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i) - << aarch64_get_vec_u64 (cpu, vm, i)); + { + shift = aarch64_get_vec_s8 (cpu, vm, i * 8); + if (shift >= 0) + aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i) + << shift); + else + aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i) + >> - shift); + } return; - - default: - HALT_NYI; } } @@ -4779,17 +5069,18 @@ do_vec_FMLA (sim_cpu *cpu) instr[9,5] = Vm instr[4.0] = Vd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; - int full = uimm (aarch64_get_instr (cpu), 30, 30); + int full = INSTR (30, 30); NYI_assert (29, 23, 0x1C); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x33); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { if (! full) HALT_UNALLOC; @@ -4823,19 +5114,20 @@ do_vec_max (sim_cpu *cpu) instr[9,5] = Vm instr[4.0] = Vd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; - int full = uimm (aarch64_get_instr (cpu), 30, 30); + int full = INSTR (30, 30); NYI_assert (28, 24, 0x0E); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x19); - if (uimm (aarch64_get_instr (cpu), 29, 29)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (29, 29)) { - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + switch (INSTR (23, 22)) { case 0: for (i = 0; i < (full ? 16 : 8); i++) @@ -4864,14 +5156,13 @@ do_vec_max (sim_cpu *cpu) : aarch64_get_vec_u32 (cpu, vm, i)); return; - default: case 3: HALT_UNALLOC; } } else { - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + switch (INSTR (23, 22)) { case 0: for (i = 0; i < (full ? 16 : 8); i++) @@ -4900,7 +5191,6 @@ do_vec_max (sim_cpu *cpu) : aarch64_get_vec_s32 (cpu, vm, i)); return; - default: case 3: HALT_UNALLOC; } @@ -4921,19 +5211,20 @@ do_vec_min (sim_cpu *cpu) instr[9,5] = Vm instr[4.0] = Vd. */ - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; - int full = uimm (aarch64_get_instr (cpu), 30, 30); + int full = INSTR (30, 30); NYI_assert (28, 24, 0x0E); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x1B); - if (uimm (aarch64_get_instr (cpu), 29, 29)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (29, 29)) { - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + switch (INSTR (23, 22)) { case 0: for (i = 0; i < (full ? 16 : 8); i++) @@ -4962,14 +5253,13 @@ do_vec_min (sim_cpu *cpu) : aarch64_get_vec_u32 (cpu, vm, i)); return; - default: case 3: HALT_UNALLOC; } } else { - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + switch (INSTR (23, 22)) { case 0: for (i = 0; i < (full ? 16 : 8); i++) @@ -4998,7 +5288,6 @@ do_vec_min (sim_cpu *cpu) : aarch64_get_vec_s32 (cpu, vm, i)); return; - default: case 3: HALT_UNALLOC; } @@ -5019,10 +5308,10 @@ do_vec_sub_long (sim_cpu *cpu) instr[9,5] = Vn instr[4,0] = V dest. */ - unsigned size = uimm (aarch64_get_instr (cpu), 23, 22); - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned size = INSTR (23, 22); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned bias = 0; unsigned i; @@ -5033,7 +5322,8 @@ do_vec_sub_long (sim_cpu *cpu) if (size == 3) HALT_UNALLOC; - switch (uimm (aarch64_get_instr (cpu), 30, 29)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (30, 29)) { case 2: /* SSUBL2. */ bias = 2; @@ -5103,21 +5393,6 @@ do_vec_sub_long (sim_cpu *cpu) } } -#define DO_ADDP(FN) \ - do \ - { \ - for (i = 0; i < range; i++) \ - { \ - aarch64_set_vec_##FN (cpu, vd, i, \ - aarch64_get_vec_##FN (cpu, vn, i * 2) \ - + aarch64_get_vec_##FN (cpu, vn, i * 2 + 1)); \ - aarch64_set_vec_##FN (cpu, vd, i + range, \ - aarch64_get_vec_##FN (cpu, vm, i * 2) \ - + aarch64_get_vec_##FN (cpu, vm, i * 2 + 1)); \ - } \ - } \ - while (0) - static void do_vec_ADDP (sim_cpu *cpu) { @@ -5131,91 +5406,106 @@ do_vec_ADDP (sim_cpu *cpu) instr[9,5] = Vn instr[4,0] = V dest. */ - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned size = uimm (aarch64_get_instr (cpu), 23, 22); - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + FRegister copy_vn; + FRegister copy_vm; + unsigned full = INSTR (30, 30); + unsigned size = INSTR (23, 22); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i, range; NYI_assert (29, 24, 0x0E); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x2F); + /* Make copies of the source registers in case vd == vn/vm. */ + copy_vn = cpu->fr[vn]; + copy_vm = cpu->fr[vm]; + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); switch (size) { case 0: range = full ? 8 : 4; - DO_ADDP (u8); + for (i = 0; i < range; i++) + { + aarch64_set_vec_u8 (cpu, vd, i, + copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]); + aarch64_set_vec_u8 (cpu, vd, i + range, + copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]); + } return; case 1: range = full ? 4 : 2; - DO_ADDP (u16); + for (i = 0; i < range; i++) + { + aarch64_set_vec_u16 (cpu, vd, i, + copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]); + aarch64_set_vec_u16 (cpu, vd, i + range, + copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]); + } return; case 2: range = full ? 2 : 1; - DO_ADDP (u32); + for (i = 0; i < range; i++) + { + aarch64_set_vec_u32 (cpu, vd, i, + copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]); + aarch64_set_vec_u32 (cpu, vd, i + range, + copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]); + } return; case 3: if (! full) HALT_UNALLOC; - range = 1; - DO_ADDP (u64); + aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]); + aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]); return; - - default: - HALT_NYI; } } +/* Float point vector convert to longer (precision). */ static void -do_vec_UMOV (sim_cpu *cpu) +do_vec_FCVTL (sim_cpu *cpu) { /* instr[31] = 0 - instr[30] = 32-bit(0)/64-bit(1) - instr[29,21] = 00 1110 000 - insrt[20,16] = size & index - instr[15,10] = 0011 11 - instr[9,5] = V source - instr[4,0] = R dest. */ + instr[30] = half (0) / all (1) + instr[29,23] = 00 1110 0 + instr[22] = single (0) / double (1) + instr[21,10] = 10 0001 0111 10 + instr[9,5] = Rn + instr[4,0] = Rd. */ - unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned index; + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + unsigned full = INSTR (30, 30); + unsigned i; - NYI_assert (29, 21, 0x070); - NYI_assert (15, 10, 0x0F); + NYI_assert (31, 31, 0); + NYI_assert (29, 23, 0x1C); + NYI_assert (21, 10, 0x85E); - if (uimm (aarch64_get_instr (cpu), 16, 16)) - { - /* Byte transfer. */ - index = uimm (aarch64_get_instr (cpu), 20, 17); - aarch64_set_reg_u64 (cpu, rd, NO_SP, - aarch64_get_vec_u8 (cpu, vs, index)); - } - else if (uimm (aarch64_get_instr (cpu), 17, 17)) - { - index = uimm (aarch64_get_instr (cpu), 20, 18); - aarch64_set_reg_u64 (cpu, rd, NO_SP, - aarch64_get_vec_u16 (cpu, vs, index)); - } - else if (uimm (aarch64_get_instr (cpu), 18, 18)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { - index = uimm (aarch64_get_instr (cpu), 20, 19); - aarch64_set_reg_u64 (cpu, rd, NO_SP, - aarch64_get_vec_u32 (cpu, vs, index)); + for (i = 0; i < 2; i++) + aarch64_set_vec_double (cpu, rd, i, + aarch64_get_vec_float (cpu, rn, i + 2*full)); } else { - if (uimm (aarch64_get_instr (cpu), 30, 30) != 1) - HALT_UNALLOC; + HALT_NYI; - index = uimm (aarch64_get_instr (cpu), 20, 20); - aarch64_set_reg_u64 (cpu, rd, NO_SP, - aarch64_get_vec_u64 (cpu, vs, index)); +#if 0 + /* TODO: Implement missing half-float support. */ + for (i = 0; i < 4; i++) + aarch64_set_vec_float (cpu, rd, i, + aarch64_get_vec_halffloat (cpu, rn, i + 4*full)); +#endif } } @@ -5231,15 +5521,16 @@ do_vec_FABS (sim_cpu *cpu) instr[9,5] = Vn instr[4,0] = Vd. */ - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); + unsigned full = INSTR (30, 30); unsigned i; NYI_assert (29, 23, 0x1D); NYI_assert (21, 10, 0x83E); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { if (! full) HALT_NYI; @@ -5267,16 +5558,17 @@ do_vec_FCVTZS (sim_cpu *cpu) instr[9,5] = Rn instr[4,0] = Rd. */ - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + unsigned full = INSTR (30, 30); unsigned i; NYI_assert (31, 31, 0); NYI_assert (29, 23, 0x1D); NYI_assert (21, 10, 0x86E); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { if (! full) HALT_UNALLOC; @@ -5291,6 +5583,92 @@ do_vec_FCVTZS (sim_cpu *cpu) (int32_t) aarch64_get_vec_float (cpu, rn, i)); } +static void +do_vec_REV64 (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = full/half + instr[29,24] = 00 1110 + instr[23,22] = size + instr[21,10] = 10 0000 0000 10 + instr[9,5] = Rn + instr[4,0] = Rd. */ + + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + unsigned size = INSTR (23, 22); + unsigned full = INSTR (30, 30); + unsigned i; + FRegister val; + + NYI_assert (29, 24, 0x0E); + NYI_assert (21, 10, 0x802); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (size) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i); + break; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i); + break; + + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i); + break; + + case 3: + HALT_UNALLOC; + } + + aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]); + if (full) + aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]); +} + +static void +do_vec_REV16 (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = full/half + instr[29,24] = 00 1110 + instr[23,22] = size + instr[21,10] = 10 0000 0001 10 + instr[9,5] = Rn + instr[4,0] = Rd. */ + + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + unsigned size = INSTR (23, 22); + unsigned full = INSTR (30, 30); + unsigned i; + FRegister val; + + NYI_assert (29, 24, 0x0E); + NYI_assert (21, 10, 0x806); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (size) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i); + break; + + default: + HALT_UNALLOC; + } + + aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]); + if (full) + aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]); +} + static void do_vec_op1 (sim_cpu *cpu) { @@ -5304,29 +5682,22 @@ do_vec_op1 (sim_cpu *cpu) instr[4,0] = Vd */ NYI_assert (29, 24, 0x0E); - if (uimm (aarch64_get_instr (cpu), 21, 21) == 0) + if (INSTR (21, 21) == 0) { - if (uimm (aarch64_get_instr (cpu), 23, 22) == 0) + if (INSTR (23, 22) == 0) { - if (uimm (aarch64_get_instr (cpu), 30, 30) == 1 - && uimm (aarch64_get_instr (cpu), 17, 14) == 0 - && uimm (aarch64_get_instr (cpu), 12, 10) == 7) + if (INSTR (30, 30) == 1 + && INSTR (17, 14) == 0 + && INSTR (12, 10) == 7) return do_vec_ins_2 (cpu); - switch (uimm (aarch64_get_instr (cpu), 15, 10)) + switch (INSTR (15, 10)) { case 0x01: do_vec_DUP_vector_into_vector (cpu); return; case 0x03: do_vec_DUP_scalar_into_vector (cpu); return; case 0x07: do_vec_INS (cpu); return; - case 0x0A: do_vec_TRN (cpu); return; - - case 0x0F: - if (uimm (aarch64_get_instr (cpu), 17, 16) == 0) - { - do_vec_MOV_into_scalar (cpu); - return; - } - break; + case 0x0B: do_vec_SMOV_into_scalar (cpu); return; + case 0x0F: do_vec_UMOV_into_scalar (cpu); return; case 0x00: case 0x08: @@ -5338,6 +5709,8 @@ do_vec_op1 (sim_cpu *cpu) case 0x16: do_vec_UZP (cpu); return; + case 0x0A: do_vec_TRN (cpu); return; + case 0x0E: case 0x1E: do_vec_ZIP (cpu); return; @@ -5347,20 +5720,22 @@ do_vec_op1 (sim_cpu *cpu) } } - switch (uimm (aarch64_get_instr (cpu), 13, 10)) + switch (INSTR (13, 10)) { case 0x6: do_vec_UZP (cpu); return; case 0xE: do_vec_ZIP (cpu); return; case 0xA: do_vec_TRN (cpu); return; - case 0xF: do_vec_UMOV (cpu); return; default: HALT_NYI; } } - switch (uimm (aarch64_get_instr (cpu), 15, 10)) + switch (INSTR (15, 10)) { + case 0x02: do_vec_REV64 (cpu); return; + case 0x06: do_vec_REV16 (cpu); return; + case 0x07: - switch (uimm (aarch64_get_instr (cpu), 23, 21)) + switch (INSTR (23, 21)) { case 1: do_vec_AND (cpu); return; case 3: do_vec_BIC (cpu); return; @@ -5372,6 +5747,7 @@ do_vec_op1 (sim_cpu *cpu) case 0x08: do_vec_sub_long (cpu); return; case 0x0a: do_vec_XTN (cpu); return; case 0x11: do_vec_SSHL (cpu); return; + case 0x16: do_vec_CNT (cpu); return; case 0x19: do_vec_max (cpu); return; case 0x1B: do_vec_min (cpu); return; case 0x21: do_vec_add (cpu); return; @@ -5382,8 +5758,15 @@ do_vec_op1 (sim_cpu *cpu) case 0x33: do_vec_FMLA (cpu); return; case 0x35: do_vec_fadd (cpu); return; + case 0x1E: + switch (INSTR (20, 16)) + { + case 0x01: do_vec_FCVTL (cpu); return; + default: HALT_NYI; + } + case 0x2E: - switch (uimm (aarch64_get_instr (cpu), 20, 16)) + switch (INSTR (20, 16)) { case 0x00: do_vec_ABS (cpu); return; case 0x01: do_vec_FCVTZS (cpu); return; @@ -5426,79 +5809,92 @@ do_vec_xtl (sim_cpu *cpu) instr[9,5] = V source instr[4,0] = V dest. */ - unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vs = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i, shift, bias = 0; NYI_assert (28, 22, 0x3C); NYI_assert (15, 10, 0x29); - switch (uimm (aarch64_get_instr (cpu), 30, 29)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (30, 29)) { case 2: /* SXTL2, SSHLL2. */ bias = 2; case 0: /* SXTL, SSHLL. */ - if (uimm (aarch64_get_instr (cpu), 21, 21)) + if (INSTR (21, 21)) { - shift = uimm (aarch64_get_instr (cpu), 20, 16); - aarch64_set_vec_s64 - (cpu, vd, 0, aarch64_get_vec_s32 (cpu, vs, bias) << shift); - aarch64_set_vec_s64 - (cpu, vd, 1, aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift); + int64_t val1, val2; + + shift = INSTR (20, 16); + /* Get the source values before setting the destination values + in case the source and destination are the same. */ + val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift; + val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift; + aarch64_set_vec_s64 (cpu, vd, 0, val1); + aarch64_set_vec_s64 (cpu, vd, 1, val2); } - else if (uimm (aarch64_get_instr (cpu), 20, 20)) + else if (INSTR (20, 20)) { - shift = uimm (aarch64_get_instr (cpu), 19, 16); + int32_t v[4]; + int32_t v1,v2,v3,v4; + + shift = INSTR (19, 16); bias *= 2; for (i = 0; i < 4; i++) - aarch64_set_vec_s32 - (cpu, vd, i, aarch64_get_vec_s16 (cpu, vs, i + bias) << shift); + v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift; + for (i = 0; i < 4; i++) + aarch64_set_vec_s32 (cpu, vd, i, v[i]); } else { + int16_t v[8]; NYI_assert (19, 19, 1); - shift = uimm (aarch64_get_instr (cpu), 18, 16); - bias *= 3; + shift = INSTR (18, 16); + bias *= 4; + for (i = 0; i < 8; i++) + v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift; for (i = 0; i < 8; i++) - aarch64_set_vec_s16 - (cpu, vd, i, aarch64_get_vec_s8 (cpu, vs, i + bias) << shift); + aarch64_set_vec_s16 (cpu, vd, i, v[i]); } return; case 3: /* UXTL2, USHLL2. */ bias = 2; case 1: /* UXTL, USHLL. */ - if (uimm (aarch64_get_instr (cpu), 21, 21)) + if (INSTR (21, 21)) { - shift = uimm (aarch64_get_instr (cpu), 20, 16); - aarch64_set_vec_u64 - (cpu, vd, 0, aarch64_get_vec_u32 (cpu, vs, bias) << shift); - aarch64_set_vec_u64 - (cpu, vd, 1, aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift); + uint64_t v1, v2; + shift = INSTR (20, 16); + v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift; + v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift; + aarch64_set_vec_u64 (cpu, vd, 0, v1); + aarch64_set_vec_u64 (cpu, vd, 1, v2); } - else if (uimm (aarch64_get_instr (cpu), 20, 20)) + else if (INSTR (20, 20)) { - shift = uimm (aarch64_get_instr (cpu), 19, 16); + uint32_t v[4]; + shift = INSTR (19, 16); bias *= 2; for (i = 0; i < 4; i++) - aarch64_set_vec_u32 - (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i + bias) << shift); + v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift; + for (i = 0; i < 4; i++) + aarch64_set_vec_u32 (cpu, vd, i, v[i]); } else { + uint16_t v[8]; NYI_assert (19, 19, 1); - shift = uimm (aarch64_get_instr (cpu), 18, 16); - bias *= 3; + shift = INSTR (18, 16); + bias *= 4; for (i = 0; i < 8; i++) - aarch64_set_vec_u16 - (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, i + bias) << shift); + v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift; + for (i = 0; i < 8; i++) + aarch64_set_vec_u16 (cpu, vd, i, v[i]); } return; - - default: - HALT_NYI; } } @@ -5514,17 +5910,18 @@ do_vec_SHL (sim_cpu *cpu) instr [4, 0] = Vd. */ int shift; - int full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + int full = INSTR (30, 30); + unsigned vs = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; NYI_assert (29, 23, 0x1E); NYI_assert (15, 10, 0x15); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { - shift = uimm (aarch64_get_instr (cpu), 21, 16) - 1; + shift = INSTR (21, 16); if (full == 0) HALT_UNALLOC; @@ -5538,9 +5935,9 @@ do_vec_SHL (sim_cpu *cpu) return; } - if (uimm (aarch64_get_instr (cpu), 21, 21)) + if (INSTR (21, 21)) { - shift = uimm (aarch64_get_instr (cpu), 20, 16) - 1; + shift = INSTR (20, 16); for (i = 0; i < (full ? 4 : 2); i++) { @@ -5551,9 +5948,9 @@ do_vec_SHL (sim_cpu *cpu) return; } - if (uimm (aarch64_get_instr (cpu), 20, 20)) + if (INSTR (20, 20)) { - shift = uimm (aarch64_get_instr (cpu), 19, 16) - 1; + shift = INSTR (19, 16); for (i = 0; i < (full ? 8 : 4); i++) { @@ -5564,10 +5961,10 @@ do_vec_SHL (sim_cpu *cpu) return; } - if (uimm (aarch64_get_instr (cpu), 19, 19) == 0) + if (INSTR (19, 19) == 0) HALT_UNALLOC; - shift = uimm (aarch64_get_instr (cpu), 18, 16) - 1; + shift = INSTR (18, 16); for (i = 0; i < (full ? 16 : 8); i++) { @@ -5582,25 +5979,26 @@ do_vec_SSHR_USHR (sim_cpu *cpu) /* instr [31] = 0 instr [30] = half(0)/full(1) instr [29] = signed(0)/unsigned(1) - instr [28,23] = 01 1110 + instr [28,23] = 0 1111 0 instr [22,16] = size and shift amount instr [15,10] = 0000 01 instr [9, 5] = Vs instr [4, 0] = Vd. */ - int shift; - int full = uimm (aarch64_get_instr (cpu), 30, 30); - int sign = uimm (aarch64_get_instr (cpu), 29, 29); - unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + int full = INSTR (30, 30); + int sign = ! INSTR (29, 29); + unsigned shift = INSTR (22, 16); + unsigned vs = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; NYI_assert (28, 23, 0x1E); NYI_assert (15, 10, 0x01); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { - shift = uimm (aarch64_get_instr (cpu), 21, 16); + shift = 128 - shift; if (full == 0) HALT_UNALLOC; @@ -5621,9 +6019,9 @@ do_vec_SSHR_USHR (sim_cpu *cpu) return; } - if (uimm (aarch64_get_instr (cpu), 21, 21)) + if (INSTR (21, 21)) { - shift = uimm (aarch64_get_instr (cpu), 20, 16); + shift = 64 - shift; if (sign) for (i = 0; i < (full ? 4 : 2); i++) @@ -5641,9 +6039,9 @@ do_vec_SSHR_USHR (sim_cpu *cpu) return; } - if (uimm (aarch64_get_instr (cpu), 20, 20)) + if (INSTR (20, 20)) { - shift = uimm (aarch64_get_instr (cpu), 19, 16); + shift = 32 - shift; if (sign) for (i = 0; i < (full ? 8 : 4); i++) @@ -5661,10 +6059,10 @@ do_vec_SSHR_USHR (sim_cpu *cpu) return; } - if (uimm (aarch64_get_instr (cpu), 19, 19) == 0) + if (INSTR (19, 19) == 0) HALT_UNALLOC; - shift = uimm (aarch64_get_instr (cpu), 18, 16); + shift = 16 - shift; if (sign) for (i = 0; i < (full ? 16 : 8); i++) @@ -5681,28 +6079,187 @@ do_vec_SSHR_USHR (sim_cpu *cpu) } static void -do_vec_op2 (sim_cpu *cpu) +do_vec_MUL_by_element (sim_cpu *cpu) { /* instr[31] = 0 instr[30] = half/full instr[29,24] = 00 1111 - instr[23] = ? - instr[22,16] = element size & index + instr[23,22] = size + instr[21] = L + instr[20] = M + instr[19,16] = m + instr[15,12] = 1000 + instr[11] = H + instr[10] = 0 + instr[9,5] = Vn + instr[4,0] = Vd */ + + unsigned full = INSTR (30, 30); + unsigned L = INSTR (21, 21); + unsigned H = INSTR (11, 11); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); + unsigned size = INSTR (23, 22); + unsigned index; + unsigned vm; + unsigned e; + + NYI_assert (29, 24, 0x0F); + NYI_assert (15, 12, 0x8); + NYI_assert (10, 10, 0); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (size) + { + case 1: + { + /* 16 bit products. */ + uint16_t product; + uint16_t element1; + uint16_t element2; + + index = (H << 2) | (L << 1) | INSTR (20, 20); + vm = INSTR (19, 16); + element2 = aarch64_get_vec_u16 (cpu, vm, index); + + for (e = 0; e < (full ? 8 : 4); e ++) + { + element1 = aarch64_get_vec_u16 (cpu, vn, e); + product = element1 * element2; + aarch64_set_vec_u16 (cpu, vd, e, product); + } + } + break; + + case 2: + { + /* 32 bit products. */ + uint32_t product; + uint32_t element1; + uint32_t element2; + + index = (H << 1) | L; + vm = INSTR (20, 16); + element2 = aarch64_get_vec_u32 (cpu, vm, index); + + for (e = 0; e < (full ? 4 : 2); e ++) + { + element1 = aarch64_get_vec_u32 (cpu, vn, e); + product = element1 * element2; + aarch64_set_vec_u32 (cpu, vd, e, product); + } + } + break; + + default: + HALT_UNALLOC; + } +} + +static void +do_FMLA_by_element (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half/full + instr[29,23] = 00 1111 1 + instr[22] = size + instr[21] = L + instr[20,16] = m + instr[15,12] = 0001 + instr[11] = H + instr[10] = 0 + instr[9,5] = Vn + instr[4,0] = Vd */ + + unsigned full = INSTR (30, 30); + unsigned size = INSTR (22, 22); + unsigned L = INSTR (21, 21); + unsigned vm = INSTR (20, 16); + unsigned H = INSTR (11, 11); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); + unsigned e; + + NYI_assert (29, 23, 0x1F); + NYI_assert (15, 12, 0x1); + NYI_assert (10, 10, 0); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (size) + { + double element1, element2; + + if (! full || L) + HALT_UNALLOC; + + element2 = aarch64_get_vec_double (cpu, vm, H); + + for (e = 0; e < 2; e++) + { + element1 = aarch64_get_vec_double (cpu, vn, e); + element1 *= element2; + element1 += aarch64_get_vec_double (cpu, vd, e); + aarch64_set_vec_double (cpu, vd, e, element1); + } + } + else + { + float element1; + float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L); + + for (e = 0; e < (full ? 4 : 2); e++) + { + element1 = aarch64_get_vec_float (cpu, vn, e); + element1 *= element2; + element1 += aarch64_get_vec_float (cpu, vd, e); + aarch64_set_vec_float (cpu, vd, e, element1); + } + } +} + +static void +do_vec_op2 (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half/full + instr[29,24] = 00 1111 + instr[23] = ? + instr[22,16] = element size & index instr[15,10] = sub-opcode instr[9,5] = Vm - instr[4.0] = Vd */ + instr[4,0] = Vd */ NYI_assert (29, 24, 0x0F); - if (uimm (aarch64_get_instr (cpu), 23, 23) != 0) - HALT_NYI; + if (INSTR (23, 23) != 0) + { + switch (INSTR (15, 10)) + { + case 0x04: + case 0x06: + do_FMLA_by_element (cpu); + return; + + case 0x20: + case 0x22: + do_vec_MUL_by_element (cpu); + return; - switch (uimm (aarch64_get_instr (cpu), 15, 10)) + default: + HALT_NYI; + } + } + else { - case 0x01: do_vec_SSHR_USHR (cpu); return; - case 0x15: do_vec_SHL (cpu); return; - case 0x29: do_vec_xtl (cpu); return; - default: HALT_NYI; + switch (INSTR (15, 10)) + { + case 0x01: do_vec_SSHR_USHR (cpu); return; + case 0x15: do_vec_SHL (cpu); return; + case 0x20: + case 0x22: do_vec_MUL_by_element (cpu); return; + case 0x29: do_vec_xtl (cpu); return; + default: HALT_NYI; + } } } @@ -5717,15 +6274,16 @@ do_vec_neg (sim_cpu *cpu) instr[9,5] = Vs instr[4,0] = Vd */ - int full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + int full = INSTR (30, 30); + unsigned vs = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; NYI_assert (29, 24, 0x2E); NYI_assert (21, 10, 0x82E); - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 22)) { case 0: for (i = 0; i < (full ? 16 : 8); i++) @@ -5748,9 +6306,6 @@ do_vec_neg (sim_cpu *cpu) for (i = 0; i < 2; i++) aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i)); return; - - default: - HALT_UNREACHABLE; } } @@ -5765,15 +6320,16 @@ do_vec_sqrt (sim_cpu *cpu) instr[9,5] = Vs instr[4,0] = Vd. */ - int full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + int full = INSTR (30, 30); + unsigned vs = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; NYI_assert (29, 23, 0x5B); NYI_assert (21, 10, 0x87E); - if (uimm (aarch64_get_instr (cpu), 22, 22) == 0) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22) == 0) for (i = 0; i < (full ? 4 : 2); i++) aarch64_set_vec_float (cpu, vd, i, sqrtf (aarch64_get_vec_float (cpu, vs, i))); @@ -5799,16 +6355,17 @@ do_vec_mls_indexed (sim_cpu *cpu) instr[9,5] = Vs instr[4,0] = Vd. */ - int full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + int full = INSTR (30, 30); + unsigned vs = INSTR (9, 5); + unsigned vd = INSTR (4, 0); + unsigned vm = INSTR (20, 16); unsigned i; NYI_assert (15, 12, 4); NYI_assert (10, 10, 0); - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 22)) { case 1: { @@ -5818,8 +6375,7 @@ do_vec_mls_indexed (sim_cpu *cpu) if (vm > 15) HALT_NYI; - elem = (uimm (aarch64_get_instr (cpu), 21, 20) << 1) - | uimm (aarch64_get_instr (cpu), 11, 11); + elem = (INSTR (21, 20) << 1) | INSTR (11, 11); val = aarch64_get_vec_u16 (cpu, vm, elem); for (i = 0; i < (full ? 8 : 4); i++) @@ -5831,8 +6387,7 @@ do_vec_mls_indexed (sim_cpu *cpu) case 2: { - unsigned elem = (uimm (aarch64_get_instr (cpu), 21, 21) << 1) - | uimm (aarch64_get_instr (cpu), 11, 11); + unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11); uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem); for (i = 0; i < (full ? 4 : 2); i++) @@ -5862,17 +6417,18 @@ do_vec_SUB (sim_cpu *cpu) instr [9, 5] = Vn instr [4, 0] = Vd. */ - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = INSTR (30, 30); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; NYI_assert (29, 24, 0x2E); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x21); - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 22)) { case 0: for (i = 0; i < (full ? 16 : 8); i++) @@ -5904,9 +6460,6 @@ do_vec_SUB (sim_cpu *cpu) aarch64_get_vec_s64 (cpu, vn, i) - aarch64_get_vec_s64 (cpu, vm, i)); return; - - default: - HALT_UNREACHABLE; } } @@ -5923,40 +6476,41 @@ do_vec_MLS (sim_cpu *cpu) instr [9, 5] = Vn instr [4, 0] = Vd. */ - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = INSTR (30, 30); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; NYI_assert (29, 24, 0x2E); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x25); - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 22)) { case 0: for (i = 0; i < (full ? 16 : 8); i++) aarch64_set_vec_u8 (cpu, vd, i, - (aarch64_get_vec_u8 (cpu, vn, i) - * aarch64_get_vec_u8 (cpu, vm, i)) - - aarch64_get_vec_u8 (cpu, vd, i)); + aarch64_get_vec_u8 (cpu, vd, i) + - (aarch64_get_vec_u8 (cpu, vn, i) + * aarch64_get_vec_u8 (cpu, vm, i))); return; case 1: for (i = 0; i < (full ? 8 : 4); i++) aarch64_set_vec_u16 (cpu, vd, i, - (aarch64_get_vec_u16 (cpu, vn, i) - * aarch64_get_vec_u16 (cpu, vm, i)) - - aarch64_get_vec_u16 (cpu, vd, i)); + aarch64_get_vec_u16 (cpu, vd, i) + - (aarch64_get_vec_u16 (cpu, vn, i) + * aarch64_get_vec_u16 (cpu, vm, i))); return; case 2: for (i = 0; i < (full ? 4 : 2); i++) aarch64_set_vec_u32 (cpu, vd, i, - (aarch64_get_vec_u32 (cpu, vn, i) - * aarch64_get_vec_u32 (cpu, vm, i)) - - aarch64_get_vec_u32 (cpu, vd, i)); + aarch64_get_vec_u32 (cpu, vd, i) + - (aarch64_get_vec_u32 (cpu, vn, i) + * aarch64_get_vec_u32 (cpu, vm, i))); return; default: @@ -5977,17 +6531,18 @@ do_vec_FDIV (sim_cpu *cpu) instr [9, 5] = Vn instr [4, 0] = Vd. */ - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = INSTR (30, 30); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; NYI_assert (29, 23, 0x5C); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x3F); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { if (! full) HALT_UNALLOC; @@ -6017,17 +6572,18 @@ do_vec_FMUL (sim_cpu *cpu) instr [9, 5] = Vn instr [4, 0] = Vd. */ - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = INSTR (30, 30); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; NYI_assert (29, 23, 0x5C); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x37); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { if (! full) HALT_UNALLOC; @@ -6057,39 +6613,55 @@ do_vec_FADDP (sim_cpu *cpu) instr [9, 5] = Vn instr [4, 0] = Vd. */ - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned full = INSTR (30, 30); + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); NYI_assert (29, 23, 0x5C); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x35); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { + /* Extract values before adding them incase vd == vn/vm. */ + double tmp1 = aarch64_get_vec_double (cpu, vn, 0); + double tmp2 = aarch64_get_vec_double (cpu, vn, 1); + double tmp3 = aarch64_get_vec_double (cpu, vm, 0); + double tmp4 = aarch64_get_vec_double (cpu, vm, 1); + if (! full) HALT_UNALLOC; - aarch64_set_vec_double (cpu, vd, 0, aarch64_get_vec_double (cpu, vn, 0) - + aarch64_get_vec_double (cpu, vn, 1)); - aarch64_set_vec_double (cpu, vd, 1, aarch64_get_vec_double (cpu, vm, 0) - + aarch64_get_vec_double (cpu, vm, 1)); + aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2); + aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4); } else { - aarch64_set_vec_float (cpu, vd, 0, aarch64_get_vec_float (cpu, vn, 0) - + aarch64_get_vec_float (cpu, vn, 1)); - if (full) - aarch64_set_vec_float (cpu, vd, 1, aarch64_get_vec_float (cpu, vn, 2) - + aarch64_get_vec_float (cpu, vn, 3)); - aarch64_set_vec_float (cpu, vd, full ? 2 : 1, - aarch64_get_vec_float (cpu, vm, 0) - + aarch64_get_vec_float (cpu, vm, 1)); + /* Extract values before adding them incase vd == vn/vm. */ + float tmp1 = aarch64_get_vec_float (cpu, vn, 0); + float tmp2 = aarch64_get_vec_float (cpu, vn, 1); + float tmp5 = aarch64_get_vec_float (cpu, vm, 0); + float tmp6 = aarch64_get_vec_float (cpu, vm, 1); + if (full) - aarch64_set_vec_float (cpu, vd, 3, - aarch64_get_vec_float (cpu, vm, 2) - + aarch64_get_vec_float (cpu, vm, 3)); + { + float tmp3 = aarch64_get_vec_float (cpu, vn, 2); + float tmp4 = aarch64_get_vec_float (cpu, vn, 3); + float tmp7 = aarch64_get_vec_float (cpu, vm, 2); + float tmp8 = aarch64_get_vec_float (cpu, vm, 3); + + aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2); + aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4); + aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6); + aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8); + } + else + { + aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2); + aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6); + } } } @@ -6104,15 +6676,16 @@ do_vec_FSQRT (sim_cpu *cpu) instr[9,5] = Vsrc instr[4,0] = Vdest. */ - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); + unsigned full = INSTR (30, 30); int i; NYI_assert (29, 23, 0x5D); NYI_assert (21, 10, 0x87E); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { if (! full) HALT_UNALLOC; @@ -6140,15 +6713,16 @@ do_vec_FNEG (sim_cpu *cpu) instr[9,5] = Vsrc instr[4,0] = Vdest. */ - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); + unsigned full = INSTR (30, 30); int i; NYI_assert (29, 23, 0x5D); NYI_assert (21, 10, 0x83E); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { if (! full) HALT_UNALLOC; @@ -6170,23 +6744,85 @@ do_vec_NOT (sim_cpu *cpu) { /* instr[31] = 0 instr[30] = half (0)/full (1) - instr[29,21] = 10 1110 001 - instr[20,16] = 0 0000 - instr[15,10] = 0101 10 + instr[29,10] = 10 1110 0010 0000 0101 10 instr[9,5] = Vn instr[4.0] = Vd. */ - unsigned vn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned i; - int full = uimm (aarch64_get_instr (cpu), 30, 30); + int full = INSTR (30, 30); NYI_assert (29, 10, 0xB8816); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); for (i = 0; i < (full ? 16 : 8); i++) aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i)); } +static unsigned int +clz (uint64_t val, unsigned size) +{ + uint64_t mask = 1; + int count; + + mask <<= (size - 1); + count = 0; + do + { + if (val & mask) + break; + mask >>= 1; + count ++; + } + while (mask); + + return count; +} + +static void +do_vec_CLZ (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = half (0)/full (1) + instr[29,24] = 10 1110 + instr[23,22] = size + instr[21,10] = 10 0000 0100 10 + instr[9,5] = Vn + instr[4.0] = Vd. */ + + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); + unsigned i; + int full = INSTR (30,30); + + NYI_assert (29, 24, 0x2E); + NYI_assert (21, 10, 0x812); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (23, 22)) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8)); + break; + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16)); + break; + case 2: + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32)); + break; + case 3: + if (! full) + HALT_UNALLOC; + aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64)); + aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64)); + break; + } +} + static void do_vec_MOV_element (sim_cpu *cpu) { @@ -6198,8 +6834,8 @@ do_vec_MOV_element (sim_cpu *cpu) instr[9,5] = Vs instr[4.0] = Vd. */ - unsigned vs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned vs = INSTR (9, 5); + unsigned vd = INSTR (4, 0); unsigned src_index; unsigned dst_index; @@ -6207,29 +6843,30 @@ do_vec_MOV_element (sim_cpu *cpu) NYI_assert (15, 15, 0); NYI_assert (10, 10, 1); - if (uimm (aarch64_get_instr (cpu), 16, 16)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (16, 16)) { /* Move a byte. */ - src_index = uimm (aarch64_get_instr (cpu), 14, 11); - dst_index = uimm (aarch64_get_instr (cpu), 20, 17); + src_index = INSTR (14, 11); + dst_index = INSTR (20, 17); aarch64_set_vec_u8 (cpu, vd, dst_index, aarch64_get_vec_u8 (cpu, vs, src_index)); } - else if (uimm (aarch64_get_instr (cpu), 17, 17)) + else if (INSTR (17, 17)) { /* Move 16-bits. */ NYI_assert (11, 11, 0); - src_index = uimm (aarch64_get_instr (cpu), 14, 12); - dst_index = uimm (aarch64_get_instr (cpu), 20, 18); + src_index = INSTR (14, 12); + dst_index = INSTR (20, 18); aarch64_set_vec_u16 (cpu, vd, dst_index, aarch64_get_vec_u16 (cpu, vs, src_index)); } - else if (uimm (aarch64_get_instr (cpu), 18, 18)) + else if (INSTR (18, 18)) { /* Move 32-bits. */ NYI_assert (12, 11, 0); - src_index = uimm (aarch64_get_instr (cpu), 14, 13); - dst_index = uimm (aarch64_get_instr (cpu), 20, 19); + src_index = INSTR (14, 13); + dst_index = INSTR (20, 19); aarch64_set_vec_u32 (cpu, vd, dst_index, aarch64_get_vec_u32 (cpu, vs, src_index)); } @@ -6237,74 +6874,152 @@ do_vec_MOV_element (sim_cpu *cpu) { NYI_assert (19, 19, 1); NYI_assert (13, 11, 0); - src_index = uimm (aarch64_get_instr (cpu), 14, 14); - dst_index = uimm (aarch64_get_instr (cpu), 20, 20); + src_index = INSTR (14, 14); + dst_index = INSTR (20, 20); aarch64_set_vec_u64 (cpu, vd, dst_index, aarch64_get_vec_u64 (cpu, vs, src_index)); } } +static void +do_vec_REV32 (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = full/half + instr[29,24] = 10 1110 + instr[23,22] = size + instr[21,10] = 10 0000 0000 10 + instr[9,5] = Rn + instr[4,0] = Rd. */ + + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + unsigned size = INSTR (23, 22); + unsigned full = INSTR (30, 30); + unsigned i; + FRegister val; + + NYI_assert (29, 24, 0x2E); + NYI_assert (21, 10, 0x802); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (size) + { + case 0: + for (i = 0; i < (full ? 16 : 8); i++) + val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i); + break; + + case 1: + for (i = 0; i < (full ? 8 : 4); i++) + val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i); + break; + + default: + HALT_UNALLOC; + } + + aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]); + if (full) + aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]); +} + +static void +do_vec_EXT (sim_cpu *cpu) +{ + /* instr[31] = 0 + instr[30] = full/half + instr[29,21] = 10 1110 000 + instr[20,16] = Vm + instr[15] = 0 + instr[14,11] = source index + instr[10] = 0 + instr[9,5] = Vn + instr[4.0] = Vd. */ + + unsigned vm = INSTR (20, 16); + unsigned vn = INSTR (9, 5); + unsigned vd = INSTR (4, 0); + unsigned src_index = INSTR (14, 11); + unsigned full = INSTR (30, 30); + unsigned i; + unsigned j; + FRegister val; + + NYI_assert (31, 21, 0x370); + NYI_assert (15, 15, 0); + NYI_assert (10, 10, 0); + + if (!full && (src_index & 0x8)) + HALT_UNALLOC; + + j = 0; + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + for (i = src_index; i < (full ? 16 : 8); i++) + val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i); + for (i = 0; i < src_index; i++) + val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i); + + aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]); + if (full) + aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]); +} + static void dexAdvSIMD0 (sim_cpu *cpu) { /* instr [28,25] = 0 111. */ - if ( uimm (aarch64_get_instr (cpu), 15, 10) == 0x07 - && (uimm (aarch64_get_instr (cpu), 9, 5) == - uimm (aarch64_get_instr (cpu), 20, 16))) + if ( INSTR (15, 10) == 0x07 + && (INSTR (9, 5) == + INSTR (20, 16))) { - if (uimm (aarch64_get_instr (cpu), 31, 21) == 0x075 - || uimm (aarch64_get_instr (cpu), 31, 21) == 0x275) + if (INSTR (31, 21) == 0x075 + || INSTR (31, 21) == 0x275) { do_vec_MOV_whole_vector (cpu); return; } } - if (uimm (aarch64_get_instr (cpu), 29, 19) == 0x1E0) + if (INSTR (29, 19) == 0x1E0) { do_vec_MOV_immediate (cpu); return; } - if (uimm (aarch64_get_instr (cpu), 29, 19) == 0x5E0) + if (INSTR (29, 19) == 0x5E0) { do_vec_MVNI (cpu); return; } - if (uimm (aarch64_get_instr (cpu), 29, 19) == 0x1C0 - || uimm (aarch64_get_instr (cpu), 29, 19) == 0x1C1) + if (INSTR (29, 19) == 0x1C0 + || INSTR (29, 19) == 0x1C1) { - if (uimm (aarch64_get_instr (cpu), 15, 10) == 0x03) + if (INSTR (15, 10) == 0x03) { do_vec_DUP_scalar_into_vector (cpu); return; } } - switch (uimm (aarch64_get_instr (cpu), 29, 24)) + switch (INSTR (29, 24)) { case 0x0E: do_vec_op1 (cpu); return; case 0x0F: do_vec_op2 (cpu); return; - case 0x2f: - switch (uimm (aarch64_get_instr (cpu), 15, 10)) - { - case 0x01: do_vec_SSHR_USHR (cpu); return; - case 0x10: - case 0x12: do_vec_mls_indexed (cpu); return; - case 0x29: do_vec_xtl (cpu); return; - default: - HALT_NYI; - } - case 0x2E: - if (uimm (aarch64_get_instr (cpu), 21, 21) == 1) + if (INSTR (21, 21) == 1) { - switch (uimm (aarch64_get_instr (cpu), 15, 10)) + switch (INSTR (15, 10)) { + case 0x02: + do_vec_REV32 (cpu); + return; + case 0x07: - switch (uimm (aarch64_get_instr (cpu), 23, 22)) + switch (INSTR (23, 22)) { case 0: do_vec_EOR (cpu); return; case 1: do_vec_BSL (cpu); return; @@ -6315,6 +7030,7 @@ dexAdvSIMD0 (sim_cpu *cpu) case 0x08: do_vec_sub_long (cpu); return; case 0x11: do_vec_USHL (cpu); return; + case 0x12: do_vec_CLZ (cpu); return; case 0x16: do_vec_NOT (cpu); return; case 0x19: do_vec_max (cpu); return; case 0x1B: do_vec_min (cpu); return; @@ -6326,7 +7042,7 @@ dexAdvSIMD0 (sim_cpu *cpu) case 0x3F: do_vec_FDIV (cpu); return; case 0x3E: - switch (uimm (aarch64_get_instr (cpu), 20, 16)) + switch (INSTR (20, 16)) { case 0x00: do_vec_FNEG (cpu); return; case 0x01: do_vec_FSQRT (cpu); return; @@ -6345,22 +7061,26 @@ dexAdvSIMD0 (sim_cpu *cpu) case 0x3A: do_vec_compare (cpu); return; - default: break; + default: + break; } } - if (uimm (aarch64_get_instr (cpu), 31, 21) == 0x370) + if (INSTR (31, 21) == 0x370) { - do_vec_MOV_element (cpu); + if (INSTR (10, 10)) + do_vec_MOV_element (cpu); + else + do_vec_EXT (cpu); return; } - switch (uimm (aarch64_get_instr (cpu), 21, 10)) + switch (INSTR (21, 10)) { case 0x82E: do_vec_neg (cpu); return; case 0x87E: do_vec_sqrt (cpu); return; default: - if (uimm (aarch64_get_instr (cpu), 15, 10) == 0x30) + if (INSTR (15, 10) == 0x30) { do_vec_mull (cpu); return; @@ -6369,6 +7089,17 @@ dexAdvSIMD0 (sim_cpu *cpu) } break; + case 0x2f: + switch (INSTR (15, 10)) + { + case 0x01: do_vec_SSHR_USHR (cpu); return; + case 0x10: + case 0x12: do_vec_mls_indexed (cpu); return; + case 0x29: do_vec_xtl (cpu); return; + default: + HALT_NYI; + } + default: break; } @@ -6382,11 +7113,12 @@ dexAdvSIMD0 (sim_cpu *cpu) static void fmadds (sim_cpu *cpu) { - unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sa = INSTR (14, 10); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa) + aarch64_get_FP_float (cpu, sn) * aarch64_get_FP_float (cpu, sm)); @@ -6396,11 +7128,12 @@ fmadds (sim_cpu *cpu) static void fmaddd (sim_cpu *cpu) { - unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sa = INSTR (14, 10); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa) + aarch64_get_FP_double (cpu, sn) * aarch64_get_FP_double (cpu, sm)); @@ -6410,11 +7143,12 @@ fmaddd (sim_cpu *cpu) static void fmsubs (sim_cpu *cpu) { - unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sa = INSTR (14, 10); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa) - aarch64_get_FP_float (cpu, sn) * aarch64_get_FP_float (cpu, sm)); @@ -6424,11 +7158,12 @@ fmsubs (sim_cpu *cpu) static void fmsubd (sim_cpu *cpu) { - unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sa = INSTR (14, 10); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa) - aarch64_get_FP_double (cpu, sn) * aarch64_get_FP_double (cpu, sm)); @@ -6438,11 +7173,12 @@ fmsubd (sim_cpu *cpu) static void fnmadds (sim_cpu *cpu) { - unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sa = INSTR (14, 10); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa) + (- aarch64_get_FP_float (cpu, sn)) * aarch64_get_FP_float (cpu, sm)); @@ -6452,11 +7188,12 @@ fnmadds (sim_cpu *cpu) static void fnmaddd (sim_cpu *cpu) { - unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sa = INSTR (14, 10); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa) + (- aarch64_get_FP_double (cpu, sn)) * aarch64_get_FP_double (cpu, sm)); @@ -6466,11 +7203,12 @@ fnmaddd (sim_cpu *cpu) static void fnmsubs (sim_cpu *cpu) { - unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sa = INSTR (14, 10); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa) + aarch64_get_FP_float (cpu, sn) * aarch64_get_FP_float (cpu, sm)); @@ -6480,11 +7218,12 @@ fnmsubs (sim_cpu *cpu) static void fnmsubd (sim_cpu *cpu) { - unsigned sa = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sa = INSTR (14, 10); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa) + aarch64_get_FP_double (cpu, sn) * aarch64_get_FP_double (cpu, sm)); @@ -6502,11 +7241,9 @@ dexSimpleFPDataProc3Source (sim_cpu *cpu) instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */ - uint32_t M_S = (uimm (aarch64_get_instr (cpu), 31, 31) << 1) - | uimm (aarch64_get_instr (cpu), 29, 29); + uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); /* dispatch on combined type:o1:o2. */ - uint32_t dispatch = (uimm (aarch64_get_instr (cpu), 23, 21) << 1) - | uimm (aarch64_get_instr (cpu), 15, 15); + uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15); if (M_S != 0) HALT_UNALLOC; @@ -6536,7 +7273,58 @@ dexSimpleFPFixedConvert (sim_cpu *cpu) static void dexSimpleFPCondCompare (sim_cpu *cpu) { - HALT_NYI; + /* instr [31,23] = 0001 1110 0 + instr [22] = type + instr [21] = 1 + instr [20,16] = Rm + instr [15,12] = condition + instr [11,10] = 01 + instr [9,5] = Rn + instr [4] = 0 + instr [3,0] = nzcv */ + + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + + NYI_assert (31, 23, 0x3C); + NYI_assert (11, 10, 0x1); + NYI_assert (4, 4, 0); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (! testConditionCode (cpu, INSTR (15, 12))) + { + aarch64_set_CPSR (cpu, INSTR (3, 0)); + return; + } + + if (INSTR (22, 22)) + { + /* Double precision. */ + double val1 = aarch64_get_vec_double (cpu, rn, 0); + double val2 = aarch64_get_vec_double (cpu, rm, 0); + + /* FIXME: Check for NaNs. */ + if (val1 == val2) + aarch64_set_CPSR (cpu, (Z | C)); + else if (val1 < val2) + aarch64_set_CPSR (cpu, N); + else /* val1 > val2 */ + aarch64_set_CPSR (cpu, C); + } + else + { + /* Single precision. */ + float val1 = aarch64_get_vec_float (cpu, rn, 0); + float val2 = aarch64_get_vec_float (cpu, rm, 0); + + /* FIXME: Check for NaNs. */ + if (val1 == val2) + aarch64_set_CPSR (cpu, (Z | C)); + else if (val1 < val2) + aarch64_set_CPSR (cpu, N); + else /* val1 > val2 */ + aarch64_set_CPSR (cpu, C); + } } /* 2 sources. */ @@ -6545,10 +7333,11 @@ dexSimpleFPCondCompare (sim_cpu *cpu) static void fadds (sim_cpu *cpu) { - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) + aarch64_get_FP_float (cpu, sm)); } @@ -6557,10 +7346,11 @@ fadds (sim_cpu *cpu) static void faddd (sim_cpu *cpu) { - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) + aarch64_get_FP_double (cpu, sm)); } @@ -6569,10 +7359,11 @@ faddd (sim_cpu *cpu) static void fdivs (sim_cpu *cpu) { - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) / aarch64_get_FP_float (cpu, sm)); } @@ -6581,10 +7372,11 @@ fdivs (sim_cpu *cpu) static void fdivd (sim_cpu *cpu) { - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) / aarch64_get_FP_double (cpu, sm)); } @@ -6593,10 +7385,11 @@ fdivd (sim_cpu *cpu) static void fmuls (sim_cpu *cpu) { - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) * aarch64_get_FP_float (cpu, sm)); } @@ -6605,10 +7398,11 @@ fmuls (sim_cpu *cpu) static void fmuld (sim_cpu *cpu) { - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) * aarch64_get_FP_double (cpu, sm)); } @@ -6617,10 +7411,11 @@ fmuld (sim_cpu *cpu) static void fnmuls (sim_cpu *cpu) { - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn) * aarch64_get_FP_float (cpu, sm))); } @@ -6629,10 +7424,11 @@ fnmuls (sim_cpu *cpu) static void fnmuld (sim_cpu *cpu) { - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn) * aarch64_get_FP_double (cpu, sm))); } @@ -6641,10 +7437,11 @@ fnmuld (sim_cpu *cpu) static void fsubs (sim_cpu *cpu) { - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) - aarch64_get_FP_float (cpu, sm)); } @@ -6653,10 +7450,11 @@ fsubs (sim_cpu *cpu) static void fsubd (sim_cpu *cpu) { - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) - aarch64_get_FP_double (cpu, sm)); } @@ -6672,14 +7470,15 @@ do_FMINNM (sim_cpu *cpu) instr[9,5] = Sn instr[4,0] = Cpu */ - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); NYI_assert (31, 23, 0x03C); NYI_assert (15, 10, 0x1E); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) aarch64_set_FP_double (cpu, sd, dminnm (aarch64_get_FP_double (cpu, sn), aarch64_get_FP_double (cpu, sm))); @@ -6700,14 +7499,15 @@ do_FMAXNM (sim_cpu *cpu) instr[9,5] = Sn instr[4,0] = Cpu */ - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); NYI_assert (31, 23, 0x03C); NYI_assert (15, 10, 0x1A); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) aarch64_set_FP_double (cpu, sd, dmaxnm (aarch64_get_FP_double (cpu, sn), aarch64_get_FP_double (cpu, sm))); @@ -6737,11 +7537,10 @@ dexSimpleFPDataProc2Source (sim_cpu *cpu) instr[9,5] = Vn instr[4,0] = Vd */ - uint32_t M_S = (uimm (aarch64_get_instr (cpu), 31, 31) << 1) - | uimm (aarch64_get_instr (cpu), 29, 29); - uint32_t type = uimm (aarch64_get_instr (cpu), 23, 22); + uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); + uint32_t type = INSTR (23, 22); /* Dispatch on opcode. */ - uint32_t dispatch = uimm (aarch64_get_instr (cpu), 15, 12); + uint32_t dispatch = INSTR (15, 12); if (type > 1) HALT_UNALLOC; @@ -6800,53 +7599,59 @@ dexSimpleFPCondSelect (sim_cpu *cpu) instr[11,10] = 11 instr[9,5] = Sn instr[4,0] = Cpu */ - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); - uint32_t set = testConditionCode (cpu, uimm (aarch64_get_instr (cpu), 15, 12)); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); + unsigned sd = INSTR ( 4, 0); + uint32_t set = testConditionCode (cpu, INSTR (15, 12)); NYI_assert (31, 23, 0x03C); NYI_assert (11, 10, 0x3); - if (uimm (aarch64_get_instr (cpu), 22, 22)) - aarch64_set_FP_double (cpu, sd, set ? sn : sm); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) + aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn) + : aarch64_get_FP_double (cpu, sm))); else - aarch64_set_FP_float (cpu, sd, set ? sn : sm); + aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn) + : aarch64_get_FP_float (cpu, sm))); } /* Store 32 bit unscaled signed 9 bit. */ static void fsturs (sim_cpu *cpu, int32_t offset) { - unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rn = INSTR (9, 5); + unsigned int st = INSTR (4, 0); - aarch64_set_mem_float (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset, - aarch64_get_FP_float (cpu, rn)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset, + aarch64_get_vec_u32 (cpu, st, 0)); } /* Store 64 bit unscaled signed 9 bit. */ static void fsturd (sim_cpu *cpu, int32_t offset) { - unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rn = INSTR (9, 5); + unsigned int st = INSTR (4, 0); - aarch64_set_mem_double (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset, - aarch64_get_FP_double (cpu, rn)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset, + aarch64_get_vec_u64 (cpu, st, 0)); } /* Store 128 bit unscaled signed 9 bit. */ static void fsturq (sim_cpu *cpu, int32_t offset) { - unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rn = INSTR (9, 5); + unsigned int st = INSTR (4, 0); FRegister a; - aarch64_get_FP_long_double (cpu, rn, & a); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_get_FP_long_double (cpu, st, & a); aarch64_set_mem_long_double (cpu, - aarch64_get_reg_u64 (cpu, st, 1) + aarch64_get_reg_u64 (cpu, rn, 1) + offset, a); } @@ -6856,9 +7661,10 @@ fsturq (sim_cpu *cpu, int32_t offset) static void ffmovs (sim_cpu *cpu) { - unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rn = INSTR (9, 5); + unsigned int st = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn)); } @@ -6866,9 +7672,10 @@ ffmovs (sim_cpu *cpu) static void ffmovd (sim_cpu *cpu) { - unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rn = INSTR (9, 5); + unsigned int st = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn)); } @@ -6876,9 +7683,10 @@ ffmovd (sim_cpu *cpu) static void fgmovs (sim_cpu *cpu) { - unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rn = INSTR (9, 5); + unsigned int st = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP)); } @@ -6886,9 +7694,10 @@ fgmovs (sim_cpu *cpu) static void fgmovd (sim_cpu *cpu) { - unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rn = INSTR (9, 5); + unsigned int st = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP)); } @@ -6896,9 +7705,10 @@ fgmovd (sim_cpu *cpu) static void gfmovs (sim_cpu *cpu) { - unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rn = INSTR (9, 5); + unsigned int st = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0)); } @@ -6906,9 +7716,10 @@ gfmovs (sim_cpu *cpu) static void gfmovd (sim_cpu *cpu) { - unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rn = INSTR (9, 5); + unsigned int st = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0)); } @@ -6921,20 +7732,22 @@ gfmovd (sim_cpu *cpu) static void fmovs (sim_cpu *cpu) { - unsigned int sd = uimm (aarch64_get_instr (cpu), 4, 0); - uint32_t imm = uimm (aarch64_get_instr (cpu), 20, 13); + unsigned int sd = INSTR (4, 0); + uint32_t imm = INSTR (20, 13); float f = fp_immediate_for_encoding_32 (imm); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, sd, f); } static void fmovd (sim_cpu *cpu) { - unsigned int sd = uimm (aarch64_get_instr (cpu), 4, 0); - uint32_t imm = uimm (aarch64_get_instr (cpu), 20, 13); + unsigned int sd = INSTR (4, 0); + uint32_t imm = INSTR (20, 13); double d = fp_immediate_for_encoding_64 (imm); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, d); } @@ -6948,14 +7761,14 @@ dexSimpleFPImmediate (sim_cpu *cpu) instr[12,10] == 100 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC instr[4,0] == Rd */ - uint32_t imm5 = uimm (aarch64_get_instr (cpu), 9, 5); + uint32_t imm5 = INSTR (9, 5); NYI_assert (31, 23, 0x3C); if (imm5 != 0) HALT_UNALLOC; - if (uimm (aarch64_get_instr (cpu), 22, 22)) + if (INSTR (22, 22)) fmovd (cpu); else fmovs (cpu); @@ -6972,33 +7785,36 @@ dexSimpleFPImmediate (sim_cpu *cpu) static void fldurs (sim_cpu *cpu, int32_t offset) { - unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rn = INSTR (9, 5); + unsigned int st = INSTR (4, 0); - aarch64_set_FP_float (cpu, st, aarch64_get_mem_float - (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); } /* Load 64 bit unscaled signed 9 bit. */ static void fldurd (sim_cpu *cpu, int32_t offset) { - unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rn = INSTR (9, 5); + unsigned int st = INSTR (4, 0); - aarch64_set_FP_double (cpu, st, aarch64_get_mem_double - (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 + (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); } /* Load 128 bit unscaled signed 9 bit. */ static void fldurq (sim_cpu *cpu, int32_t offset) { - unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned int st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rn = INSTR (9, 5); + unsigned int st = INSTR (4, 0); FRegister a; uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_get_mem_long_double (cpu, addr, & a); aarch64_set_FP_long_double (cpu, st, a); } @@ -7013,10 +7829,11 @@ fldurq (sim_cpu *cpu, int32_t offset) static void fabss (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sn = INSTR (9, 5); + unsigned sd = INSTR (4, 0); float value = aarch64_get_FP_float (cpu, sn); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, sd, fabsf (value)); } @@ -7024,10 +7841,11 @@ fabss (sim_cpu *cpu) static void fabcpu (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sn = INSTR (9, 5); + unsigned sd = INSTR (4, 0); double value = aarch64_get_FP_double (cpu, sn); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, fabs (value)); } @@ -7035,9 +7853,10 @@ fabcpu (sim_cpu *cpu) static void fnegs (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sn = INSTR (9, 5); + unsigned sd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn)); } @@ -7045,9 +7864,10 @@ fnegs (sim_cpu *cpu) static void fnegd (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sn = INSTR (9, 5); + unsigned sd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn)); } @@ -7055,19 +7875,21 @@ fnegd (sim_cpu *cpu) static void fsqrts (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sn = INSTR (9, 5); + unsigned sd = INSTR (4, 0); - aarch64_set_FP_float (cpu, sd, sqrt (aarch64_get_FP_float (cpu, sn))); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn))); } /* Double square root. */ static void fsqrtd (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sn = INSTR (9, 5); + unsigned sd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, sqrt (aarch64_get_FP_double (cpu, sn))); } @@ -7076,9 +7898,10 @@ fsqrtd (sim_cpu *cpu) static void fcvtds (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sn = INSTR (9, 5); + unsigned sd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn)); } @@ -7086,9 +7909,10 @@ fcvtds (sim_cpu *cpu) static void fcvtcpu (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sn = INSTR (9, 5); + unsigned sd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn)); } @@ -7104,9 +7928,9 @@ do_FRINT (sim_cpu *cpu) instr[4,0] = dest */ float val; - unsigned rs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned int rmode = uimm (aarch64_get_instr (cpu), 17, 15); + unsigned rs = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + unsigned int rmode = INSTR (17, 15); NYI_assert (31, 23, 0x03C); NYI_assert (21, 18, 0x9); @@ -7116,7 +7940,8 @@ do_FRINT (sim_cpu *cpu) /* FIXME: Add support for rmode == 6 exactness check. */ rmode = uimm (aarch64_get_FPSR (cpu), 23, 22); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { double val = aarch64_get_FP_double (cpu, rs); @@ -7216,6 +8041,57 @@ do_FRINT (sim_cpu *cpu) } } +/* Convert half to float. */ +static void +do_FCVT_half_to_single (sim_cpu *cpu) +{ + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + + NYI_assert (31, 10, 0x7B890); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn)); +} + +/* Convert half to double. */ +static void +do_FCVT_half_to_double (sim_cpu *cpu) +{ + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + + NYI_assert (31, 10, 0x7B8B0); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn)); +} + +static void +do_FCVT_single_to_half (sim_cpu *cpu) +{ + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + + NYI_assert (31, 10, 0x788F0); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn)); +} + +/* Convert double to half. */ +static void +do_FCVT_double_to_half (sim_cpu *cpu) +{ + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + + NYI_assert (31, 10, 0x798F0); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn)); +} + static void dexSimpleFPDataProc1Source (sim_cpu *cpu) { @@ -7243,20 +8119,22 @@ dexSimpleFPDataProc1Source (sim_cpu *cpu) 000101 ==> FCVT (half-to-double) instr[14,10] = 10000. */ - uint32_t M_S = (uimm (aarch64_get_instr (cpu), 31, 31) << 1) - | uimm (aarch64_get_instr (cpu), 29, 29); - uint32_t type = uimm (aarch64_get_instr (cpu), 23, 22); - uint32_t opcode = uimm (aarch64_get_instr (cpu), 20, 15); + uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); + uint32_t type = INSTR (23, 22); + uint32_t opcode = INSTR (20, 15); if (M_S != 0) HALT_UNALLOC; if (type == 3) { - if (opcode == 4 || opcode == 5) - HALT_NYI; + if (opcode == 4) + do_FCVT_half_to_single (cpu); + else if (opcode == 5) + do_FCVT_half_to_double (cpu); else HALT_UNALLOC; + return; } if (type == 2) @@ -7315,7 +8193,13 @@ dexSimpleFPDataProc1Source (sim_cpu *cpu) do_FRINT (cpu); return; - case 7: /* FCVT double/single to half precision. */ + case 7: + if (INSTR (22, 22)) + do_FCVT_double_to_half (cpu); + else + do_FCVT_single_to_half (cpu); + return; + case 13: HALT_NYI; @@ -7328,9 +8212,10 @@ dexSimpleFPDataProc1Source (sim_cpu *cpu) static void scvtf32 (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned sd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP)); } @@ -7339,9 +8224,10 @@ scvtf32 (sim_cpu *cpu) static void scvtf (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned sd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_float (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP)); } @@ -7350,9 +8236,10 @@ scvtf (sim_cpu *cpu) static void scvtd32 (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned sd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP)); } @@ -7361,9 +8248,10 @@ scvtd32 (sim_cpu *cpu) static void scvtd (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned sd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned sd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_FP_double (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP)); } @@ -7377,6 +8265,17 @@ static const float FLOAT_LONG_MIN = (float) LONG_MIN; static const double DOUBLE_LONG_MAX = (double) LONG_MAX; static const double DOUBLE_LONG_MIN = (double) LONG_MIN; +#define UINT_MIN 0 +#define ULONG_MIN 0 +static const float FLOAT_UINT_MAX = (float) UINT_MAX; +static const float FLOAT_UINT_MIN = (float) UINT_MIN; +static const double DOUBLE_UINT_MAX = (double) UINT_MAX; +static const double DOUBLE_UINT_MIN = (double) UINT_MIN; +static const float FLOAT_ULONG_MAX = (float) ULONG_MAX; +static const float FLOAT_ULONG_MIN = (float) ULONG_MIN; +static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX; +static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN; + /* Check for FP exception conditions: NaN raises IO Infinity raises IO @@ -7426,14 +8325,15 @@ static const double DOUBLE_LONG_MIN = (double) LONG_MIN; static void fcvtszs32 (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); /* TODO : check that this rounds toward zero. */ float f = aarch64_get_FP_float (cpu, sn); int32_t value = (int32_t) f; RAISE_EXCEPTIONS (f, value, FLOAT, INT); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* Avoid sign extension to 64 bit. */ aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value); } @@ -7442,13 +8342,14 @@ fcvtszs32 (sim_cpu *cpu) static void fcvtszs (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); float f = aarch64_get_FP_float (cpu, sn); int64_t value = (int64_t) f; RAISE_EXCEPTIONS (f, value, FLOAT, LONG); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_s64 (cpu, rd, NO_SP, value); } @@ -7456,14 +8357,15 @@ fcvtszs (sim_cpu *cpu) static void fcvtszd32 (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); /* TODO : check that this rounds toward zero. */ double d = aarch64_get_FP_double (cpu, sn); int32_t value = (int32_t) d; RAISE_EXCEPTIONS (d, value, DOUBLE, INT); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* Avoid sign extension to 64 bit. */ aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value); } @@ -7472,8 +8374,8 @@ fcvtszd32 (sim_cpu *cpu) static void fcvtszd (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned sn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); /* TODO : check that this rounds toward zero. */ double d = aarch64_get_FP_double (cpu, sn); int64_t value; @@ -7482,6 +8384,7 @@ fcvtszd (sim_cpu *cpu) RAISE_EXCEPTIONS (d, value, DOUBLE, LONG); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_s64 (cpu, rd, NO_SP, value); } @@ -7497,27 +8400,28 @@ do_fcvtzu (sim_cpu *cpu) instr[9,5] = Rs instr[4,0] = Rd. */ - unsigned rs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rs = INSTR (9, 5); + unsigned rd = INSTR (4, 0); NYI_assert (30, 23, 0x3C); NYI_assert (20, 16, 0x19); - if (uimm (aarch64_get_instr (cpu), 21, 21) != 1) + if (INSTR (21, 21) != 1) /* Convert to fixed point. */ HALT_NYI; - if (uimm (aarch64_get_instr (cpu), 31, 31)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (31, 31)) { /* Convert to unsigned 64-bit integer. */ - if (uimm (aarch64_get_instr (cpu), 22, 22)) + if (INSTR (22, 22)) { double d = aarch64_get_FP_double (cpu, rs); uint64_t value = (uint64_t) d; /* Do not raise an exception if we have reached ULONG_MAX. */ if (value != (1UL << 63)) - RAISE_EXCEPTIONS (d, value, DOUBLE, LONG); + RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG); aarch64_set_reg_u64 (cpu, rd, NO_SP, value); } @@ -7528,7 +8432,7 @@ do_fcvtzu (sim_cpu *cpu) /* Do not raise an exception if we have reached ULONG_MAX. */ if (value != (1UL << 63)) - RAISE_EXCEPTIONS (f, value, FLOAT, LONG); + RAISE_EXCEPTIONS (f, value, FLOAT, ULONG); aarch64_set_reg_u64 (cpu, rd, NO_SP, value); } @@ -7538,14 +8442,14 @@ do_fcvtzu (sim_cpu *cpu) uint32_t value; /* Convert to unsigned 32-bit integer. */ - if (uimm (aarch64_get_instr (cpu), 22, 22)) + if (INSTR (22, 22)) { double d = aarch64_get_FP_double (cpu, rs); value = (uint32_t) d; /* Do not raise an exception if we have reached UINT_MAX. */ if (value != (1UL << 31)) - RAISE_EXCEPTIONS (d, value, DOUBLE, INT); + RAISE_EXCEPTIONS (d, value, DOUBLE, UINT); } else { @@ -7554,7 +8458,7 @@ do_fcvtzu (sim_cpu *cpu) value = (uint32_t) f; /* Do not raise an exception if we have reached UINT_MAX. */ if (value != (1UL << 31)) - RAISE_EXCEPTIONS (f, value, FLOAT, INT); + RAISE_EXCEPTIONS (f, value, FLOAT, UINT); } aarch64_set_reg_u64 (cpu, rd, NO_SP, value); @@ -7573,21 +8477,22 @@ do_UCVTF (sim_cpu *cpu) instr[9,5] = Rs instr[4,0] = Rd. */ - unsigned rs = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rs = INSTR (9, 5); + unsigned rd = INSTR (4, 0); NYI_assert (30, 23, 0x3C); NYI_assert (20, 16, 0x03); - if (uimm (aarch64_get_instr (cpu), 21, 21) != 1) + if (INSTR (21, 21) != 1) HALT_NYI; /* FIXME: Add exception raising. */ - if (uimm (aarch64_get_instr (cpu), 31, 31)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (31, 31)) { uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + if (INSTR (22, 22)) aarch64_set_FP_double (cpu, rd, (double) value); else aarch64_set_FP_float (cpu, rd, (float) value); @@ -7596,7 +8501,7 @@ do_UCVTF (sim_cpu *cpu) { uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + if (INSTR (22, 22)) aarch64_set_FP_double (cpu, rd, (double) value); else aarch64_set_FP_float (cpu, rd, (float) value); @@ -7612,15 +8517,16 @@ float_vector_move (sim_cpu *cpu) instr[9,5] ==> source instr[4,0] ==> dest. */ - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); NYI_assert (31, 17, 0x4F57); - if (uimm (aarch64_get_instr (cpu), 15, 10) != 0) + if (INSTR (15, 10) != 0) HALT_UNALLOC; - if (uimm (aarch64_get_instr (cpu), 16, 16)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (16, 16)) aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP)); else aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1)); @@ -7646,22 +8552,22 @@ dexSimpleFPIntegerConvert (sim_cpu *cpu) uint32_t size; uint32_t S; - if (uimm (aarch64_get_instr (cpu), 31, 17) == 0x4F57) + if (INSTR (31, 17) == 0x4F57) { float_vector_move (cpu); return; } - size = uimm (aarch64_get_instr (cpu), 31, 31); - S = uimm (aarch64_get_instr (cpu), 29, 29); + size = INSTR (31, 31); + S = INSTR (29, 29); if (S != 0) HALT_UNALLOC; - type = uimm (aarch64_get_instr (cpu), 23, 22); + type = INSTR (23, 22); if (type > 1) HALT_UNALLOC; - rmode_opcode = uimm (aarch64_get_instr (cpu), 20, 16); + rmode_opcode = INSTR (20, 16); size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */ switch (rmode_opcode) @@ -7673,8 +8579,6 @@ dexSimpleFPIntegerConvert (sim_cpu *cpu) case 1: scvtd32 (cpu); return; case 2: scvtf (cpu); return; case 3: scvtd (cpu); return; - default: - HALT_UNREACHABLE; } case 6: /* FMOV GR, Vec. */ @@ -7700,7 +8604,6 @@ dexSimpleFPIntegerConvert (sim_cpu *cpu) case 1: fcvtszd32 (cpu); return; case 2: fcvtszs (cpu); return; case 3: fcvtszd (cpu); return; - default: HALT_UNREACHABLE; } case 25: do_fcvtzu (cpu); return; @@ -7724,8 +8627,22 @@ set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2) { uint32_t flags; + /* FIXME: Add exception raising. */ if (isnan (fvalue1) || isnan (fvalue2)) flags = C|V; + else if (isinf (fvalue1) && isinf (fvalue2)) + { + /* Subtracting two infinities may give a NaN. We only need to compare + the signs, which we can get from isinf. */ + int result = isinf (fvalue1) - isinf (fvalue2); + + if (result == 0) + flags = Z|C; + else if (result < 0) + flags = N; + else /* (result > 0). */ + flags = C; + } else { float result = fvalue1 - fvalue2; @@ -7744,12 +8661,13 @@ set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2) static void fcmps (sim_cpu *cpu) { - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); float fvalue1 = aarch64_get_FP_float (cpu, sn); float fvalue2 = aarch64_get_FP_float (cpu, sm); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); set_flags_for_float_compare (cpu, fvalue1, fvalue2); } @@ -7758,9 +8676,10 @@ fcmps (sim_cpu *cpu) static void fcmpzs (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sn = INSTR ( 9, 5); float fvalue1 = aarch64_get_FP_float (cpu, sn); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); set_flags_for_float_compare (cpu, fvalue1, 0.0f); } @@ -7768,12 +8687,13 @@ fcmpzs (sim_cpu *cpu) static void fcmpes (sim_cpu *cpu) { - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); float fvalue1 = aarch64_get_FP_float (cpu, sn); float fvalue2 = aarch64_get_FP_float (cpu, sm); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); set_flags_for_float_compare (cpu, fvalue1, fvalue2); } @@ -7781,9 +8701,10 @@ fcmpes (sim_cpu *cpu) static void fcmpzes (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sn = INSTR ( 9, 5); float fvalue1 = aarch64_get_FP_float (cpu, sn); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); set_flags_for_float_compare (cpu, fvalue1, 0.0f); } @@ -7792,8 +8713,22 @@ set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2) { uint32_t flags; + /* FIXME: Add exception raising. */ if (isnan (dval1) || isnan (dval2)) flags = C|V; + else if (isinf (dval1) && isinf (dval2)) + { + /* Subtracting two infinities may give a NaN. We only need to compare + the signs, which we can get from isinf. */ + int result = isinf (dval1) - isinf (dval2); + + if (result == 0) + flags = Z|C; + else if (result < 0) + flags = N; + else /* (result > 0). */ + flags = C; + } else { double result = dval1 - dval2; @@ -7813,12 +8748,13 @@ set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2) static void fcmpd (sim_cpu *cpu) { - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); double dvalue1 = aarch64_get_FP_double (cpu, sn); double dvalue2 = aarch64_get_FP_double (cpu, sm); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); set_flags_for_double_compare (cpu, dvalue1, dvalue2); } @@ -7827,9 +8763,10 @@ fcmpd (sim_cpu *cpu) static void fcmpzd (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sn = INSTR ( 9, 5); double dvalue1 = aarch64_get_FP_double (cpu, sn); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); set_flags_for_double_compare (cpu, dvalue1, 0.0); } @@ -7837,12 +8774,13 @@ fcmpzd (sim_cpu *cpu) static void fcmped (sim_cpu *cpu) { - unsigned sm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sm = INSTR (20, 16); + unsigned sn = INSTR ( 9, 5); double dvalue1 = aarch64_get_FP_double (cpu, sn); double dvalue2 = aarch64_get_FP_double (cpu, sm); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); set_flags_for_double_compare (cpu, dvalue1, dvalue2); } @@ -7850,9 +8788,10 @@ fcmped (sim_cpu *cpu) static void fcmpzed (sim_cpu *cpu) { - unsigned sn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned sn = INSTR ( 9, 5); double dvalue1 = aarch64_get_FP_double (cpu, sn); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); set_flags_for_double_compare (cpu, dvalue1, 0.0); } @@ -7869,11 +8808,10 @@ dexSimpleFPCompare (sim_cpu *cpu) 01000 ==> FCMPZ, 11000 ==> FCMPEZ, ow ==> UNALLOC */ uint32_t dispatch; - uint32_t M_S = (uimm (aarch64_get_instr (cpu), 31, 31) << 1) - | uimm (aarch64_get_instr (cpu), 29, 29); - uint32_t type = uimm (aarch64_get_instr (cpu), 23, 22); - uint32_t op = uimm (aarch64_get_instr (cpu), 15, 14); - uint32_t op2_2_0 = uimm (aarch64_get_instr (cpu), 2, 0); + uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); + uint32_t type = INSTR (23, 22); + uint32_t op = INSTR (15, 14); + uint32_t op2_2_0 = INSTR (2, 0); if (op2_2_0 != 0) HALT_UNALLOC; @@ -7888,7 +8826,7 @@ dexSimpleFPCompare (sim_cpu *cpu) HALT_UNALLOC; /* dispatch on type and top 2 bits of opcode. */ - dispatch = (type << 2) | uimm (aarch64_get_instr (cpu), 4, 3); + dispatch = (type << 2) | INSTR (4, 3); switch (dispatch) { @@ -7900,26 +8838,26 @@ dexSimpleFPCompare (sim_cpu *cpu) case 5: fcmpzd (cpu); return; case 6: fcmped (cpu); return; case 7: fcmpzed (cpu); return; - default: HALT_UNREACHABLE; } } static void do_scalar_FADDP (sim_cpu *cpu) { - /* instr [31,23] = 011111100 + /* instr [31,23] = 0111 1110 0 instr [22] = single(0)/double(1) - instr [21,10] = 1100 0011 0110 + instr [21,10] = 11 0000 1101 10 instr [9,5] = Fn instr [4,0] = Fd. */ - unsigned Fn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned Fd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned Fn = INSTR (9, 5); + unsigned Fd = INSTR (4, 0); NYI_assert (31, 23, 0x0FC); NYI_assert (21, 10, 0xC36); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { double val1 = aarch64_get_vec_double (cpu, Fn, 0); double val2 = aarch64_get_vec_double (cpu, Fn, 1); @@ -7948,15 +8886,16 @@ do_scalar_FABD (sim_cpu *cpu) instr [9, 5] = Rn instr [4, 0] = Rd. */ - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); NYI_assert (31, 23, 0x0FD); NYI_assert (21, 21, 1); NYI_assert (15, 10, 0x35); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) aarch64_set_FP_double (cpu, rd, fabs (aarch64_get_FP_double (cpu, rn) - aarch64_get_FP_double (cpu, rm))); @@ -7975,13 +8914,14 @@ do_scalar_CMGT (sim_cpu *cpu) instr [9, 5] = Rn instr [4, 0] = Rd. */ - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); NYI_assert (31, 21, 0x2F7); NYI_assert (15, 10, 0x0D); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) > aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L); @@ -7996,38 +8936,219 @@ do_scalar_USHR (sim_cpu *cpu) instr [9, 5] = Rn instr [4, 0] = Rd. */ - unsigned amount = 128 - uimm (aarch64_get_instr (cpu), 22, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned amount = 128 - INSTR (22, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); NYI_assert (31, 23, 0x0FE); NYI_assert (15, 10, 0x01); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> amount); } static void -do_scalar_SHL (sim_cpu *cpu) +do_scalar_SSHL (sim_cpu *cpu) +{ + /* instr [31,21] = 0101 1110 111 + instr [20,16] = Rm + instr [15,10] = 0100 01 + instr [9, 5] = Rn + instr [4, 0] = Rd. */ + + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + signed int shift = aarch64_get_vec_s8 (cpu, rm, 0); + + NYI_assert (31, 21, 0x2F7); + NYI_assert (15, 10, 0x11); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (shift >= 0) + aarch64_set_vec_s64 (cpu, rd, 0, + aarch64_get_vec_s64 (cpu, rn, 0) << shift); + else + aarch64_set_vec_s64 (cpu, rd, 0, + aarch64_get_vec_s64 (cpu, rn, 0) >> - shift); +} + +/* Floating point scalar compare greater than or equal to 0. */ +static void +do_scalar_FCMGE_zero (sim_cpu *cpu) +{ + /* instr [31,23] = 0111 1110 1 + instr [22,22] = size + instr [21,16] = 1000 00 + instr [15,10] = 1100 10 + instr [9, 5] = Rn + instr [4, 0] = Rd. */ + + unsigned size = INSTR (22, 22); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + + NYI_assert (31, 23, 0x0FD); + NYI_assert (21, 16, 0x20); + NYI_assert (15, 10, 0x32); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (size) + aarch64_set_vec_u64 (cpu, rd, 0, + aarch64_get_vec_double (cpu, rn, 0) >= 0.0 ? -1 : 0); + else + aarch64_set_vec_u32 (cpu, rd, 0, + aarch64_get_vec_float (cpu, rn, 0) >= 0.0 ? -1 : 0); +} + +/* Floating point scalar compare less than or equal to 0. */ +static void +do_scalar_FCMLE_zero (sim_cpu *cpu) +{ + /* instr [31,23] = 0111 1110 1 + instr [22,22] = size + instr [21,16] = 1000 00 + instr [15,10] = 1101 10 + instr [9, 5] = Rn + instr [4, 0] = Rd. */ + + unsigned size = INSTR (22, 22); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + + NYI_assert (31, 23, 0x0FD); + NYI_assert (21, 16, 0x20); + NYI_assert (15, 10, 0x36); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (size) + aarch64_set_vec_u64 (cpu, rd, 0, + aarch64_get_vec_double (cpu, rn, 0) <= 0.0 ? -1 : 0); + else + aarch64_set_vec_u32 (cpu, rd, 0, + aarch64_get_vec_float (cpu, rn, 0) <= 0.0 ? -1 : 0); +} + +/* Floating point scalar compare greater than 0. */ +static void +do_scalar_FCMGT_zero (sim_cpu *cpu) +{ + /* instr [31,23] = 0101 1110 1 + instr [22,22] = size + instr [21,16] = 1000 00 + instr [15,10] = 1100 10 + instr [9, 5] = Rn + instr [4, 0] = Rd. */ + + unsigned size = INSTR (22, 22); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + + NYI_assert (31, 23, 0x0BD); + NYI_assert (21, 16, 0x20); + NYI_assert (15, 10, 0x32); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (size) + aarch64_set_vec_u64 (cpu, rd, 0, + aarch64_get_vec_double (cpu, rn, 0) > 0.0 ? -1 : 0); + else + aarch64_set_vec_u32 (cpu, rd, 0, + aarch64_get_vec_float (cpu, rn, 0) > 0.0 ? -1 : 0); +} + +/* Floating point scalar compare equal to 0. */ +static void +do_scalar_FCMEQ_zero (sim_cpu *cpu) { - /* instr [31,23] = 0111 1101 0 + /* instr [31,23] = 0101 1110 1 + instr [22,22] = size + instr [21,16] = 1000 00 + instr [15,10] = 1101 10 + instr [9, 5] = Rn + instr [4, 0] = Rd. */ + + unsigned size = INSTR (22, 22); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + + NYI_assert (31, 23, 0x0BD); + NYI_assert (21, 16, 0x20); + NYI_assert (15, 10, 0x36); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (size) + aarch64_set_vec_u64 (cpu, rd, 0, + aarch64_get_vec_double (cpu, rn, 0) == 0.0 ? -1 : 0); + else + aarch64_set_vec_u32 (cpu, rd, 0, + aarch64_get_vec_float (cpu, rn, 0) == 0.0 ? -1 : 0); +} + +/* Floating point scalar compare less than 0. */ +static void +do_scalar_FCMLT_zero (sim_cpu *cpu) +{ + /* instr [31,23] = 0101 1110 1 + instr [22,22] = size + instr [21,16] = 1000 00 + instr [15,10] = 1110 10 + instr [9, 5] = Rn + instr [4, 0] = Rd. */ + + unsigned size = INSTR (22, 22); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + + NYI_assert (31, 23, 0x0BD); + NYI_assert (21, 16, 0x20); + NYI_assert (15, 10, 0x3A); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (size) + aarch64_set_vec_u64 (cpu, rd, 0, + aarch64_get_vec_double (cpu, rn, 0) < 0.0 ? -1 : 0); + else + aarch64_set_vec_u32 (cpu, rd, 0, + aarch64_get_vec_float (cpu, rn, 0) < 0.0 ? -1 : 0); +} + +static void +do_scalar_shift (sim_cpu *cpu) +{ + /* instr [31,23] = 0101 1111 0 instr [22,16] = shift amount - instr [15,10] = 0101 01 + instr [15,10] = 0101 01 [SHL] + instr [15,10] = 0000 01 [SSHR] instr [9, 5] = Rn instr [4, 0] = Rd. */ - unsigned amount = uimm (aarch64_get_instr (cpu), 22, 16) - 64; - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + unsigned amount; NYI_assert (31, 23, 0x0BE); - NYI_assert (15, 10, 0x15); - if (uimm (aarch64_get_instr (cpu), 22, 22) == 0) + if (INSTR (22, 22) == 0) HALT_UNALLOC; - aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0) << amount); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + switch (INSTR (15, 10)) + { + case 0x01: /* SSHR */ + amount = 128 - INSTR (22, 16); + aarch64_set_vec_s64 (cpu, rd, 0, + aarch64_get_vec_s64 (cpu, rn, 0) >> amount); + return; + case 0x15: /* SHL */ + amount = INSTR (22, 16) - 64; + aarch64_set_vec_u64 (cpu, rd, 0, + aarch64_get_vec_u64 (cpu, rn, 0) << amount); + return; + default: + HALT_NYI; + } } /* FCMEQ FCMGT FCMGE. */ @@ -8047,12 +9168,10 @@ do_scalar_FCM (sim_cpu *cpu) instr [9, 5] = Rn instr [4, 0] = Rd. */ - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned EUac = (uimm (aarch64_get_instr (cpu), 23, 23) << 2) - | (uimm (aarch64_get_instr (cpu), 29, 29) << 1) - | uimm (aarch64_get_instr (cpu), 11, 11); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11); unsigned result; float val1; float val2; @@ -8063,7 +9182,8 @@ do_scalar_FCM (sim_cpu *cpu) NYI_assert (15, 12, 0xE); NYI_assert (10, 10, 1); - if (uimm (aarch64_get_instr (cpu), 22, 22)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { double val1 = aarch64_get_FP_double (cpu, rn); double val2 = aarch64_get_FP_double (cpu, rm); @@ -8127,72 +9247,166 @@ do_scalar_FCM (sim_cpu *cpu) HALT_UNALLOC; } - aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0); + aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0); +} + +/* An alias of DUP. */ +static void +do_scalar_MOV (sim_cpu *cpu) +{ + /* instr [31,21] = 0101 1110 000 + instr [20,16] = imm5 + instr [15,10] = 0000 01 + instr [9, 5] = Rn + instr [4, 0] = Rd. */ + + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + unsigned index; + + NYI_assert (31, 21, 0x2F0); + NYI_assert (15, 10, 0x01); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (16, 16)) + { + /* 8-bit. */ + index = INSTR (20, 17); + aarch64_set_vec_u8 + (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index)); + } + else if (INSTR (17, 17)) + { + /* 16-bit. */ + index = INSTR (20, 18); + aarch64_set_vec_u16 + (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index)); + } + else if (INSTR (18, 18)) + { + /* 32-bit. */ + index = INSTR (20, 19); + aarch64_set_vec_u32 + (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index)); + } + else if (INSTR (19, 19)) + { + /* 64-bit. */ + index = INSTR (20, 20); + aarch64_set_vec_u64 + (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index)); + } + else + HALT_UNALLOC; +} + +static void +do_scalar_NEG (sim_cpu *cpu) +{ + /* instr [31,10] = 0111 1110 1110 0000 1011 10 + instr [9, 5] = Rn + instr [4, 0] = Rd. */ + + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + + NYI_assert (31, 10, 0x1FB82E); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0)); +} + +static void +do_scalar_USHL (sim_cpu *cpu) +{ + /* instr [31,21] = 0111 1110 111 + instr [20,16] = Rm + instr [15,10] = 0100 01 + instr [9, 5] = Rn + instr [4, 0] = Rd. */ + + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + signed int shift = aarch64_get_vec_s8 (cpu, rm, 0); + + NYI_assert (31, 21, 0x3F7); + NYI_assert (15, 10, 0x11); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (shift >= 0) + aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift); + else + aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift); +} + +static void +do_double_add (sim_cpu *cpu) +{ + /* instr [31,21] = 0101 1110 111 + instr [20,16] = Fn + instr [15,10] = 1000 01 + instr [9,5] = Fm + instr [4,0] = Fd. */ + unsigned Fd; + unsigned Fm; + unsigned Fn; + double val1; + double val2; + + NYI_assert (31, 21, 0x2F7); + NYI_assert (15, 10, 0x21); + + Fd = INSTR (4, 0); + Fm = INSTR (9, 5); + Fn = INSTR (20, 16); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + val1 = aarch64_get_FP_double (cpu, Fm); + val2 = aarch64_get_FP_double (cpu, Fn); + + aarch64_set_FP_double (cpu, Fd, val1 + val2); } -/* An alias of DUP. */ static void -do_scalar_MOV (sim_cpu *cpu) +do_scalar_UCVTF (sim_cpu *cpu) { - /* instr [31,21] = 0101 1110 000 - instr [20,16] = imm5 - instr [15,10] = 0000 01 - instr [9, 5] = Rn - instr [4, 0] = Rd. */ + /* instr [31,23] = 0111 1110 0 + instr [22] = single(0)/double(1) + instr [21,10] = 10 0001 1101 10 + instr [9,5] = rn + instr [4,0] = rd. */ - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned index; + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); - NYI_assert (31, 21, 0x2F0); - NYI_assert (15, 10, 0x01); + NYI_assert (31, 23, 0x0FC); + NYI_assert (21, 10, 0x876); - if (uimm (aarch64_get_instr (cpu), 16, 16)) - { - /* 8-bit. */ - index = uimm (aarch64_get_instr (cpu), 20, 17); - aarch64_set_vec_u8 - (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index)); - } - else if (uimm (aarch64_get_instr (cpu), 17, 17)) - { - /* 16-bit. */ - index = uimm (aarch64_get_instr (cpu), 20, 18); - aarch64_set_vec_u16 - (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index)); - } - else if (uimm (aarch64_get_instr (cpu), 18, 18)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (INSTR (22, 22)) { - /* 32-bit. */ - index = uimm (aarch64_get_instr (cpu), 20, 19); - aarch64_set_vec_u32 - (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index)); + uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0); + + aarch64_set_vec_double (cpu, rd, 0, (double) val); } - else if (uimm (aarch64_get_instr (cpu), 19, 19)) + else { - /* 64-bit. */ - index = uimm (aarch64_get_instr (cpu), 20, 20); - aarch64_set_vec_u64 - (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index)); + uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0); + + aarch64_set_vec_float (cpu, rd, 0, (float) val); } - else - HALT_UNALLOC; } static void -do_double_add (sim_cpu *cpu) +do_scalar_vec (sim_cpu *cpu) { + /* instr [30] = 1. */ /* instr [28,25] = 1111. */ - unsigned Fd; - unsigned Fm; - unsigned Fn; - double val1; - double val2; - - switch (uimm (aarch64_get_instr (cpu), 31, 23)) + switch (INSTR (31, 23)) { case 0xBC: - switch (uimm (aarch64_get_instr (cpu), 15, 10)) + switch (INSTR (15, 10)) { case 0x01: do_scalar_MOV (cpu); return; case 0x39: do_scalar_FCM (cpu); return; @@ -8200,22 +9414,32 @@ do_double_add (sim_cpu *cpu) } break; - case 0xBE: do_scalar_SHL (cpu); return; + case 0xBE: do_scalar_shift (cpu); return; case 0xFC: - switch (uimm (aarch64_get_instr (cpu), 15, 10)) + switch (INSTR (15, 10)) { - case 0x36: do_scalar_FADDP (cpu); return; + case 0x36: + switch (INSTR (21, 16)) + { + case 0x30: do_scalar_FADDP (cpu); return; + case 0x21: do_scalar_UCVTF (cpu); return; + } + HALT_NYI; case 0x39: do_scalar_FCM (cpu); return; case 0x3B: do_scalar_FCM (cpu); return; } break; case 0xFD: - switch (uimm (aarch64_get_instr (cpu), 15, 10)) + switch (INSTR (15, 10)) { case 0x0D: do_scalar_CMGT (cpu); return; + case 0x11: do_scalar_USHL (cpu); return; + case 0x2E: do_scalar_NEG (cpu); return; + case 0x32: do_scalar_FCMGE_zero (cpu); return; case 0x35: do_scalar_FABD (cpu); return; + case 0x36: do_scalar_FCMLE_zero (cpu); return; case 0x39: do_scalar_FCM (cpu); return; case 0x3B: do_scalar_FCM (cpu); return; default: @@ -8223,27 +9447,22 @@ do_double_add (sim_cpu *cpu) } case 0xFE: do_scalar_USHR (cpu); return; - default: - break; - } - - /* instr [31,21] = 0101 1110 111 - instr [20,16] = Fn - instr [15,10] = 1000 01 - instr [9,5] = Fm - instr [4,0] = Fd. */ - if (uimm (aarch64_get_instr (cpu), 31, 21) != 0x2F7 - || uimm (aarch64_get_instr (cpu), 15, 10) != 0x21) - HALT_NYI; - Fd = uimm (aarch64_get_instr (cpu), 4, 0); - Fm = uimm (aarch64_get_instr (cpu), 9, 5); - Fn = uimm (aarch64_get_instr (cpu), 20, 16); - - val1 = aarch64_get_FP_double (cpu, Fm); - val2 = aarch64_get_FP_double (cpu, Fn); + case 0xBD: + switch (INSTR (15, 10)) + { + case 0x21: do_double_add (cpu); return; + case 0x11: do_scalar_SSHL (cpu); return; + case 0x32: do_scalar_FCMGT_zero (cpu); return; + case 0x36: do_scalar_FCMEQ_zero (cpu); return; + case 0x3A: do_scalar_FCMLT_zero (cpu); return; + default: + HALT_NYI; + } - aarch64_set_FP_double (cpu, Fd, val1 + val2); + default: + HALT_NYI; + } } static void @@ -8251,25 +9470,25 @@ dexAdvSIMD1 (sim_cpu *cpu) { /* instr [28,25] = 1 111. */ - /* we are currently only interested in the basic + /* We are currently only interested in the basic scalar fp routines which all have bit 30 = 0. */ - if (uimm (aarch64_get_instr (cpu), 30, 30)) - do_double_add (cpu); + if (INSTR (30, 30)) + do_scalar_vec (cpu); /* instr[24] is set for FP data processing 3-source and clear for all other basic scalar fp instruction groups. */ - else if (uimm (aarch64_get_instr (cpu), 24, 24)) + else if (INSTR (24, 24)) dexSimpleFPDataProc3Source (cpu); /* instr[21] is clear for floating <-> fixed conversions and set for all other basic scalar fp instruction groups. */ - else if (!uimm (aarch64_get_instr (cpu), 21, 21)) + else if (!INSTR (21, 21)) dexSimpleFPFixedConvert (cpu); /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source 11 ==> cond select, 00 ==> other. */ else - switch (uimm (aarch64_get_instr (cpu), 11, 10)) + switch (INSTR (11, 10)) { case 1: dexSimpleFPCondCompare (cpu); return; case 2: dexSimpleFPDataProc2Source (cpu); return; @@ -8277,20 +9496,20 @@ dexAdvSIMD1 (sim_cpu *cpu) default: /* Now an ordered cascade of tests. - FP immediate has aarch64_get_instr (cpu)[12] == 1. - FP compare has aarch64_get_instr (cpu)[13] == 1. - FP Data Proc 1 Source has aarch64_get_instr (cpu)[14] == 1. - FP floating <--> integer conversions has aarch64_get_instr (cpu)[15] == 0. */ - if (uimm (aarch64_get_instr (cpu), 12, 12)) + FP immediate has instr [12] == 1. + FP compare has instr [13] == 1. + FP Data Proc 1 Source has instr [14] == 1. + FP floating <--> integer conversions has instr [15] == 0. */ + if (INSTR (12, 12)) dexSimpleFPImmediate (cpu); - else if (uimm (aarch64_get_instr (cpu), 13, 13)) + else if (INSTR (13, 13)) dexSimpleFPCompare (cpu); - else if (uimm (aarch64_get_instr (cpu), 14, 14)) + else if (INSTR (14, 14)) dexSimpleFPDataProc1Source (cpu); - else if (!uimm (aarch64_get_instr (cpu), 15, 15)) + else if (!INSTR (15, 15)) dexSimpleFPIntegerConvert (cpu); else @@ -8308,14 +9527,14 @@ pcadr (sim_cpu *cpu) instr[30,29] = immlo instr[23,5] = immhi. */ uint64_t address; - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); - uint32_t isPage = uimm (aarch64_get_instr (cpu), 31, 31); + unsigned rd = INSTR (4, 0); + uint32_t isPage = INSTR (31, 31); union { int64_t u64; uint64_t s64; } imm; uint64_t offset; imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5); offset = imm.u64; - offset = (offset << 2) | uimm (aarch64_get_instr (cpu), 30, 29); + offset = (offset << 2) | INSTR (30, 29); address = aarch64_get_PC (cpu); @@ -8325,6 +9544,7 @@ pcadr (sim_cpu *cpu) address &= ~0xfff; } + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset); } @@ -8351,9 +9571,10 @@ dexPCRelAddressing (sim_cpu *cpu) static void and32 (sim_cpu *cpu, uint32_t bimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, SP_OK, aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm); } @@ -8362,9 +9583,10 @@ and32 (sim_cpu *cpu, uint32_t bimm) static void and64 (sim_cpu *cpu, uint64_t bimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, SP_OK, aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm); } @@ -8373,12 +9595,13 @@ and64 (sim_cpu *cpu, uint64_t bimm) static void ands32 (sim_cpu *cpu, uint32_t bimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); uint32_t value2 = bimm; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); set_flags_for_binop32 (cpu, value1 & value2); } @@ -8387,12 +9610,13 @@ ands32 (sim_cpu *cpu, uint32_t bimm) static void ands64 (sim_cpu *cpu, uint64_t bimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); uint64_t value2 = bimm; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); set_flags_for_binop64 (cpu, value1 & value2); } @@ -8401,9 +9625,10 @@ ands64 (sim_cpu *cpu, uint64_t bimm) static void eor32 (sim_cpu *cpu, uint32_t bimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, SP_OK, aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm); } @@ -8412,9 +9637,10 @@ eor32 (sim_cpu *cpu, uint32_t bimm) static void eor64 (sim_cpu *cpu, uint64_t bimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, SP_OK, aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm); } @@ -8423,9 +9649,10 @@ eor64 (sim_cpu *cpu, uint64_t bimm) static void orr32 (sim_cpu *cpu, uint32_t bimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, SP_OK, aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm); } @@ -8434,9 +9661,10 @@ orr32 (sim_cpu *cpu, uint32_t bimm) static void orr64 (sim_cpu *cpu, uint64_t bimm) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, SP_OK, aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm); } @@ -8450,10 +9678,11 @@ orr64 (sim_cpu *cpu, uint64_t bimm) static void and32_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); @@ -8463,10 +9692,11 @@ and32_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void and64_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); @@ -8476,14 +9706,15 @@ and64_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); set_flags_for_binop32 (cpu, value1 & value2); } @@ -8492,14 +9723,15 @@ ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); set_flags_for_binop64 (cpu, value1 & value2); } @@ -8508,10 +9740,11 @@ ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); @@ -8521,10 +9754,11 @@ bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); @@ -8534,14 +9768,15 @@ bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); set_flags_for_binop32 (cpu, value1 & value2); } @@ -8550,14 +9785,15 @@ bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); set_flags_for_binop64 (cpu, value1 & value2); } @@ -8566,10 +9802,11 @@ bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); @@ -8579,10 +9816,11 @@ eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); @@ -8592,10 +9830,11 @@ eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); @@ -8605,10 +9844,11 @@ eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); @@ -8618,10 +9858,11 @@ eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); @@ -8631,10 +9872,11 @@ orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); @@ -8644,10 +9886,11 @@ orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); @@ -8657,10 +9900,11 @@ orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count) static void orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); @@ -8679,13 +9923,13 @@ dexLogicalImmediate (sim_cpu *cpu) instr[4,0] = Rd */ /* 32 bit operations must have N = 0 or else we have an UNALLOC. */ - uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); - uint32_t N = uimm (aarch64_get_instr (cpu), 22, 22); - /* uint32_t immr = uimm (aarch64_get_instr (cpu), 21, 16);. */ - /* uint32_t imms = uimm (aarch64_get_instr (cpu), 15, 10);. */ - uint32_t index = uimm (aarch64_get_instr (cpu), 22, 10); + uint32_t size = INSTR (31, 31); + uint32_t N = INSTR (22, 22); + /* uint32_t immr = INSTR (21, 16);. */ + /* uint32_t imms = INSTR (15, 10);. */ + uint32_t index = INSTR (22, 10); uint64_t bimm64 = LITable [index]; - uint32_t dispatch = uimm (aarch64_get_instr (cpu), 30, 29); + uint32_t dispatch = INSTR (30, 29); if (~size & N) HALT_UNALLOC; @@ -8730,8 +9974,9 @@ dexLogicalImmediate (sim_cpu *cpu) static void movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos) { - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16)); } @@ -8739,8 +9984,9 @@ movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos) static void movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos) { - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16)); } @@ -8748,8 +9994,9 @@ movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos) static void movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos) { - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU)); } @@ -8757,8 +10004,9 @@ movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos) static void movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos) { - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16)) ^ 0xffffffffffffffffULL)); @@ -8768,11 +10016,12 @@ movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos) static void movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos) { - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rd = INSTR (4, 0); uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP); uint32_t value = val << (pos * 16); uint32_t mask = ~(0xffffU << (pos * 16)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask))); } @@ -8780,11 +10029,12 @@ movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos) static void movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos) { - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rd = INSTR (4, 0); uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP); uint64_t value = (uint64_t) val << (pos * 16); uint64_t mask = ~(0xffffULL << (pos * 16)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask))); } @@ -8802,9 +10052,9 @@ dexMoveWideImmediate (sim_cpu *cpu) we just pass the multiplier. */ uint32_t imm; - uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); - uint32_t op = uimm (aarch64_get_instr (cpu), 30, 29); - uint32_t shift = uimm (aarch64_get_instr (cpu), 22, 21); + uint32_t size = INSTR (31, 31); + uint32_t op = INSTR (30, 29); + uint32_t shift = INSTR (22, 21); /* 32 bit can only shift 0 or 1 lot of 16. anything else is an unallocated instruction. */ @@ -8814,7 +10064,7 @@ dexMoveWideImmediate (sim_cpu *cpu) if (op == 1) HALT_UNALLOC; - imm = uimm (aarch64_get_instr (cpu), 20, 5); + imm = INSTR (20, 5); if (size == 0) { @@ -8851,7 +10101,7 @@ static void ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) { unsigned rd; - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rn = INSTR (9, 5); uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); /* Pick either s+1-r or s+1 consecutive bits out of the original word. */ @@ -8876,7 +10126,8 @@ ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) value >>= r - (s + 1); } - rd = uimm (aarch64_get_instr (cpu), 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, value); } @@ -8886,7 +10137,7 @@ static void ubfm (sim_cpu *cpu, uint32_t r, uint32_t s) { unsigned rd; - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rn = INSTR (9, 5); uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); if (r <= s) @@ -8910,7 +10161,8 @@ ubfm (sim_cpu *cpu, uint32_t r, uint32_t s) value >>= r - (s + 1); } - rd = uimm (aarch64_get_instr (cpu), 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, value); } @@ -8926,7 +10178,7 @@ static void sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) { unsigned rd; - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rn = INSTR (9, 5); /* as per ubfm32 but use an ASR instead of an LSR. */ int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP); @@ -8941,7 +10193,8 @@ sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) value >>= r - (s + 1); } - rd = uimm (aarch64_get_instr (cpu), 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value); } @@ -8951,7 +10204,7 @@ static void sbfm (sim_cpu *cpu, uint32_t r, uint32_t s) { unsigned rd; - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rn = INSTR (9, 5); /* acpu per ubfm but use an ASR instead of an LSR. */ int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP); @@ -8966,7 +10219,8 @@ sbfm (sim_cpu *cpu, uint32_t r, uint32_t s) value >>= r - (s + 1); } - rd = uimm (aarch64_get_instr (cpu), 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + rd = INSTR (4, 0); aarch64_set_reg_s64 (cpu, rd, NO_SP, value); } @@ -8980,7 +10234,7 @@ sbfm (sim_cpu *cpu, uint32_t r, uint32_t s) static void bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rn = INSTR (9, 5); uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); uint32_t mask = -1; unsigned rd; @@ -9014,12 +10268,13 @@ bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) mask >>= r - (s + 1); } - rd = uimm (aarch64_get_instr (cpu), 4, 0); + rd = INSTR (4, 0); value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP); value2 &= ~mask; value2 |= value; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value); } @@ -9030,7 +10285,7 @@ static void bfm (sim_cpu *cpu, uint32_t r, uint32_t s) { unsigned rd; - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rn = INSTR (9, 5); uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); uint64_t mask = 0xffffffffffffffffULL; @@ -9061,7 +10316,8 @@ bfm (sim_cpu *cpu, uint32_t r, uint32_t s) mask >>= r - (s + 1); } - rd = uimm (aarch64_get_instr (cpu), 4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value); } @@ -9081,11 +10337,11 @@ dexBitfieldImmediate (sim_cpu *cpu) /* 32 bit operations must have N = 0 or else we have an UNALLOC. */ uint32_t dispatch; uint32_t imms; - uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); - uint32_t N = uimm (aarch64_get_instr (cpu), 22, 22); + uint32_t size = INSTR (31, 31); + uint32_t N = INSTR (22, 22); /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */ /* or else we have an UNALLOC. */ - uint32_t immr = uimm (aarch64_get_instr (cpu), 21, 16); + uint32_t immr = INSTR (21, 16); if (~size & N) HALT_UNALLOC; @@ -9093,12 +10349,12 @@ dexBitfieldImmediate (sim_cpu *cpu) if (!size && uimm (immr, 5, 5)) HALT_UNALLOC; - imms = uimm (aarch64_get_instr (cpu), 15, 10); + imms = INSTR (15, 10); if (!size && uimm (imms, 5, 5)) HALT_UNALLOC; /* Switch on combined size and op. */ - dispatch = uimm (aarch64_get_instr (cpu), 31, 29); + dispatch = INSTR (31, 29); switch (dispatch) { case 0: sbfm32 (cpu, immr, imms); return; @@ -9119,10 +10375,10 @@ do_EXTR_32 (sim_cpu *cpu) instr[15,10] = imms : 0xxxxx for 32 bit instr[9,5] = Rn instr[4,0] = Rd */ - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned imms = uimm (aarch64_get_instr (cpu), 15, 10) & 31; - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned imms = INSTR (15, 10) & 31; + unsigned rn = INSTR ( 9, 5); + unsigned rd = INSTR ( 4, 0); uint64_t val1; uint64_t val2; @@ -9131,6 +10387,7 @@ do_EXTR_32 (sim_cpu *cpu) val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP); val2 <<= (32 - imms); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2); } @@ -9142,10 +10399,10 @@ do_EXTR_64 (sim_cpu *cpu) instr[15,10] = imms instr[9,5] = Rn instr[4,0] = Rd */ - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned imms = uimm (aarch64_get_instr (cpu), 15, 10) & 63; - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned imms = INSTR (15, 10) & 63; + unsigned rn = INSTR ( 9, 5); + unsigned rd = INSTR ( 4, 0); uint64_t val; val = aarch64_get_reg_u64 (cpu, rm, NO_SP); @@ -9171,11 +10428,11 @@ dexExtractImmediate (sim_cpu *cpu) /* 32 bit operations must have N = 0 or else we have an UNALLOC. */ /* 64 bit operations must have N = 1 or else we have an UNALLOC. */ uint32_t dispatch; - uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); - uint32_t N = uimm (aarch64_get_instr (cpu), 22, 22); + uint32_t size = INSTR (31, 31); + uint32_t N = INSTR (22, 22); /* 32 bit operations must have imms[5] = 0 or else we have an UNALLOC. */ - uint32_t imms = uimm (aarch64_get_instr (cpu), 15, 10); + uint32_t imms = INSTR (15, 10); if (size ^ N) HALT_UNALLOC; @@ -9184,7 +10441,7 @@ dexExtractImmediate (sim_cpu *cpu) HALT_UNALLOC; /* Switch on combined size and op. */ - dispatch = uimm (aarch64_get_instr (cpu), 31, 29); + dispatch = INSTR (31, 29); if (dispatch == 0) do_EXTR_32 (cpu); @@ -9251,10 +10508,9 @@ dexLoadUnscaledImmediate (sim_cpu *cpu) instr[23,22] = opc instr[20,12] = simm9 instr[9,5] = rn may be SP. */ - /* unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); */ - uint32_t V = uimm (aarch64_get_instr (cpu), 26, 26); - uint32_t dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 30) << 2) - | uimm (aarch64_get_instr (cpu), 23, 22)); + /* unsigned rt = INSTR (4, 0); */ + uint32_t V = INSTR (26, 26); + uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12); if (!V) @@ -9328,8 +10584,8 @@ dexLoadUnscaledImmediate (sim_cpu *cpu) static void ldrsb32_abs (sim_cpu *cpu, uint32_t offset) { - unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned int rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rn = INSTR (9, 5); + unsigned int rt = INSTR (4, 0); /* The target register may not be SP but the source may be there is no scaling required for a byte load. */ @@ -9343,9 +10599,9 @@ ldrsb32_abs (sim_cpu *cpu, uint32_t offset) static void ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned int rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned int rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rm = INSTR (20, 16); + unsigned int rn = INSTR (9, 5); + unsigned int rt = INSTR (4, 0); /* rn may reference SP, rm and rt must reference ZR. */ @@ -9365,8 +10621,8 @@ static void ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { uint64_t address; - unsigned int rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned int rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned int rn = INSTR (9, 5); + unsigned int rt = INSTR (4, 0); if (rn == rt && wb != NoWriteBack) HALT_UNALLOC; @@ -9390,8 +10646,8 @@ ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) static void fstrb_abs (sim_cpu *cpu, uint32_t offset) { - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = INSTR (4, 0); + unsigned rn = INSTR (9, 5); aarch64_set_mem_u8 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, @@ -9403,14 +10659,14 @@ fstrb_abs (sim_cpu *cpu, uint32_t offset) static void fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned st = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); - uint64_t displacement = OPT_SCALE (extended, 32, scaling); + uint64_t displacement = scaling == Scaled ? extended : 0; aarch64_set_mem_u8 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0)); @@ -9420,8 +10676,8 @@ fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) static void fstrh_abs (sim_cpu *cpu, uint32_t offset) { - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = INSTR (4, 0); + unsigned rn = INSTR (9, 5); aarch64_set_mem_u16 (cpu, @@ -9434,14 +10690,14 @@ fstrh_abs (sim_cpu *cpu, uint32_t offset) static void fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned st = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); - uint64_t displacement = OPT_SCALE (extended, 32, scaling); + uint64_t displacement = OPT_SCALE (extended, 16, scaling); aarch64_set_mem_u16 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0)); @@ -9451,28 +10707,28 @@ fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) static void fstrs_abs (sim_cpu *cpu, uint32_t offset) { - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = INSTR (4, 0); + unsigned rn = INSTR (9, 5); - aarch64_set_mem_float + aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32), - aarch64_get_FP_float (cpu, st)); + aarch64_get_vec_u32 (cpu, st, 0)); } /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */ static void fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned st = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); if (wb != Post) address += offset; - aarch64_set_mem_float (cpu, address, aarch64_get_FP_float (cpu, st)); + aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0)); if (wb == Post) address += offset; @@ -9486,45 +10742,45 @@ fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) static void fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned st = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); uint64_t displacement = OPT_SCALE (extended, 32, scaling); - aarch64_set_mem_float - (cpu, address + displacement, aarch64_get_FP_float (cpu, st)); + aarch64_set_mem_u32 + (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0)); } /* 64 bit store scaled unsigned 12 bit. */ static void fstrd_abs (sim_cpu *cpu, uint32_t offset) { - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = INSTR (4, 0); + unsigned rn = INSTR (9, 5); - aarch64_set_mem_double + aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64), - aarch64_get_FP_double (cpu, st)); + aarch64_get_vec_u64 (cpu, st, 0)); } /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */ static void fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned st = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); if (wb != Post) address += offset; - aarch64_set_mem_double (cpu, address, aarch64_get_FP_double (cpu, st)); + aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0)); if (wb == Post) address += offset; @@ -9538,17 +10794,17 @@ fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) static void fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned st = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); uint64_t displacement = OPT_SCALE (extended, 64, scaling); - aarch64_set_mem_double - (cpu, address + displacement, aarch64_get_FP_double (cpu, st)); + aarch64_set_mem_u64 + (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0)); } /* 128 bit store scaled unsigned 12 bit. */ @@ -9556,8 +10812,8 @@ static void fstrq_abs (sim_cpu *cpu, uint32_t offset) { FRegister a; - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned st = INSTR (4, 0); + unsigned rn = INSTR (9, 5); uint64_t addr; aarch64_get_FP_long_double (cpu, st, & a); @@ -9571,8 +10827,8 @@ static void fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) { FRegister a; - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned st = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); if (wb != Post) @@ -9593,9 +10849,9 @@ fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) static void fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned st = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned st = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), @@ -9611,21 +10867,22 @@ fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) static void dexLoadImmediatePrePost (sim_cpu *cpu) { - /* instr[29,24] == 111_00 - instr[21] == 0 - instr[11,10] == 00 - instr[31,30] = size - instr[26] = V + /* instr[31,30] = size + instr[29,27] = 111 + instr[26] = V + instr[25,24] = 00 instr[23,22] = opc + instr[21] = 0 instr[20,12] = simm9 - instr[11] = wb : 0 ==> Post, 1 ==> Pre - instr[9,5] = rn may be SP. */ - /* unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); */ - uint32_t V = uimm (aarch64_get_instr (cpu), 26, 26); - uint32_t dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 30) << 2) - | uimm (aarch64_get_instr (cpu), 23, 22)); - int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12); - WriteBack wb = writeback (aarch64_get_instr (cpu), 11); + instr[11] = wb : 0 ==> Post, 1 ==> Pre + instr[10] = 0 + instr[9,5] = Rn may be SP. + instr[4,0] = Rt */ + + uint32_t V = INSTR (26, 26); + uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); + int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12); + WriteBack wb = INSTR (11, 11); if (!V) { @@ -9699,11 +10956,10 @@ dexLoadRegisterOffset (sim_cpu *cpu) instr[9,5] = rn instr[4,0] = rt. */ - uint32_t V = uimm (aarch64_get_instr (cpu), 26,26); - uint32_t dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 30) << 2) - | uimm (aarch64_get_instr (cpu), 23, 22)); - Scaling scale = scaling (aarch64_get_instr (cpu), 12); - Extension extensionType = extension (aarch64_get_instr (cpu), 13); + uint32_t V = INSTR (26, 26); + uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); + Scaling scale = INSTR (12, 12); + Extension extensionType = INSTR (15, 13); /* Check for illegal extension types. */ if (uimm (extensionType, 1, 1) == 0) @@ -9770,17 +11026,17 @@ dexLoadRegisterOffset (sim_cpu *cpu) static void dexLoadUnsignedImmediate (sim_cpu *cpu) { - /* assert instr[29,24] == 111_01 + /* instr[29,24] == 111_01 instr[31,30] = size - instr[26] = V + instr[26] = V instr[23,22] = opc instr[21,10] = uimm12 : unsigned immediate offset - instr[9,5] = rn may be SP. */ - /* unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); */ - uint32_t V = uimm (aarch64_get_instr (cpu), 26,26); - uint32_t dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 30) << 2) - | uimm (aarch64_get_instr (cpu), 23, 22)); - uint32_t imm = uimm (aarch64_get_instr (cpu), 21, 10); + instr[9,5] = rn may be SP. + instr[4,0] = rt. */ + + uint32_t V = INSTR (26,26); + uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); + uint32_t imm = INSTR (21, 10); if (!V) { @@ -9812,19 +11068,17 @@ dexLoadUnsignedImmediate (sim_cpu *cpu) /* FReg operations. */ switch (dispatch) { - case 3: fldrq_abs (cpu, imm); return; - case 9: fldrs_abs (cpu, imm); return; - case 13: fldrd_abs (cpu, imm); return; - case 0: fstrb_abs (cpu, imm); return; - case 2: fstrq_abs (cpu, imm); return; case 4: fstrh_abs (cpu, imm); return; case 8: fstrs_abs (cpu, imm); return; case 12: fstrd_abs (cpu, imm); return; + case 2: fstrq_abs (cpu, imm); return; - case 1: /* LDR 8 bit FP. */ - case 5: /* LDR 8 bit FP. */ - HALT_NYI; + case 1: fldrb_abs (cpu, imm); return; + case 5: fldrh_abs (cpu, imm); return; + case 9: fldrs_abs (cpu, imm); return; + case 13: fldrd_abs (cpu, imm); return; + case 3: fldrq_abs (cpu, imm); return; default: case 6: @@ -9851,7 +11105,7 @@ dexLoadExclusive (sim_cpu *cpu) instr[9,5] = Rn instr[4.0] = Rt. */ - switch (uimm (aarch64_get_instr (cpu), 22, 21)) + switch (INSTR (22, 21)) { case 2: ldxr (cpu); return; case 0: stxr (cpu); return; @@ -9867,14 +11121,13 @@ dexLoadOther (sim_cpu *cpu) /* instr[29,25] = 111_0 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate instr[21:11,10] is the secondary dispatch. */ - if (uimm (aarch64_get_instr (cpu), 24, 24)) + if (INSTR (24, 24)) { dexLoadUnsignedImmediate (cpu); return; } - dispatch = ( (uimm (aarch64_get_instr (cpu), 21, 21) << 2) - | uimm (aarch64_get_instr (cpu), 11, 10)); + dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10)); switch (dispatch) { case 0: dexLoadUnscaledImmediate (cpu); return; @@ -9894,9 +11147,9 @@ dexLoadOther (sim_cpu *cpu) static void store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (14, 10); + unsigned rd = INSTR (9, 5); + unsigned rm = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); if ((rn == rd || rm == rd) && wb != NoWriteBack) @@ -9922,9 +11175,9 @@ store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb) static void store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (14, 10); + unsigned rd = INSTR (9, 5); + unsigned rm = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); if ((rn == rd || rm == rd) && wb != NoWriteBack) @@ -9936,9 +11189,9 @@ store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb) address += offset; aarch64_set_mem_u64 (cpu, address, - aarch64_get_reg_u64 (cpu, rm, SP_OK)); + aarch64_get_reg_u64 (cpu, rm, NO_SP)); aarch64_set_mem_u64 (cpu, address + 8, - aarch64_get_reg_u64 (cpu, rn, SP_OK)); + aarch64_get_reg_u64 (cpu, rn, NO_SP)); if (wb == Post) address += offset; @@ -9950,12 +11203,12 @@ store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb) static void load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (14, 10); + unsigned rd = INSTR (9, 5); + unsigned rm = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); - /* treat this as unalloc to make sure we don't do it. */ + /* Treat this as unalloc to make sure we don't do it. */ if (rn == rm) HALT_UNALLOC; @@ -9977,9 +11230,9 @@ load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb) static void load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (14, 10); + unsigned rd = INSTR (9, 5); + unsigned rm = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); /* Treat this as unalloc to make sure we don't do it. */ @@ -10004,9 +11257,9 @@ load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb) static void load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (14, 10); + unsigned rd = INSTR (9, 5); + unsigned rm = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); /* Treat this as unalloc to make sure we don't do it. */ @@ -10041,8 +11294,7 @@ dex_load_store_pair_gr (sim_cpu *cpu) instr[ 9, 5] = Rd instr[ 4, 0] = Rm. */ - uint32_t dispatch = ((uimm (aarch64_get_instr (cpu), 31, 30) << 3) - | uimm (aarch64_get_instr (cpu), 24, 22)); + uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22)); int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15); switch (dispatch) @@ -10073,9 +11325,9 @@ dex_load_store_pair_gr (sim_cpu *cpu) static void store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (14, 10); + unsigned rd = INSTR (9, 5); + unsigned rm = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); offset <<= 2; @@ -10083,8 +11335,8 @@ store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb) if (wb != Post) address += offset; - aarch64_set_mem_float (cpu, address, aarch64_get_FP_float (cpu, rm)); - aarch64_set_mem_float (cpu, address + 4, aarch64_get_FP_float (cpu, rn)); + aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0)); + aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0)); if (wb == Post) address += offset; @@ -10096,9 +11348,9 @@ store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb) static void store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (14, 10); + unsigned rd = INSTR (9, 5); + unsigned rm = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); offset <<= 3; @@ -10106,8 +11358,8 @@ store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb) if (wb != Post) address += offset; - aarch64_set_mem_double (cpu, address, aarch64_get_FP_double (cpu, rm)); - aarch64_set_mem_double (cpu, address + 8, aarch64_get_FP_double (cpu, rn)); + aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0)); + aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0)); if (wb == Post) address += offset; @@ -10120,9 +11372,9 @@ static void store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb) { FRegister a; - unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (14, 10); + unsigned rd = INSTR (9, 5); + unsigned rm = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); offset <<= 4; @@ -10145,9 +11397,9 @@ store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb) static void load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (14, 10); + unsigned rd = INSTR (9, 5); + unsigned rm = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); if (rm == rn) @@ -10158,8 +11410,8 @@ load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb) if (wb != Post) address += offset; - aarch64_set_FP_float (cpu, rm, aarch64_get_mem_float (cpu, address)); - aarch64_set_FP_float (cpu, rn, aarch64_get_mem_float (cpu, address + 4)); + aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address)); + aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4)); if (wb == Post) address += offset; @@ -10171,9 +11423,9 @@ load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb) static void load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb) { - unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (14, 10); + unsigned rd = INSTR (9, 5); + unsigned rm = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); if (rm == rn) @@ -10184,8 +11436,8 @@ load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb) if (wb != Post) address += offset; - aarch64_set_FP_double (cpu, rm, aarch64_get_mem_double (cpu, address)); - aarch64_set_FP_double (cpu, rn, aarch64_get_mem_double (cpu, address + 8)); + aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address)); + aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8)); if (wb == Post) address += offset; @@ -10198,9 +11450,9 @@ static void load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb) { FRegister a; - unsigned rn = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rd = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rm = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (14, 10); + unsigned rd = INSTR (9, 5); + unsigned rm = INSTR (4, 0); uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); if (rm == rn) @@ -10235,8 +11487,7 @@ dex_load_store_pair_fp (sim_cpu *cpu) instr[ 9, 5] = Rd instr[ 4, 0] = Rm */ - uint32_t dispatch = ((uimm (aarch64_get_instr (cpu), 31, 30) << 3) - | uimm (aarch64_get_instr (cpu), 24, 22)); + uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22)); int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15); switch (dispatch) @@ -10273,603 +11524,466 @@ vec_reg (unsigned v, unsigned o) return (v + o) & 0x3F; } -/* Load multiple N-element structures to N consecutive registers. */ +/* Load multiple N-element structures to M consecutive registers. */ static void -vec_load (sim_cpu *cpu, uint64_t address, unsigned N) +vec_load (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M) { - int all = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned size = uimm (aarch64_get_instr (cpu), 11, 10); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned i; + int all = INSTR (30, 30); + unsigned size = INSTR (11, 10); + unsigned vd = INSTR (4, 0); + unsigned rpt = (N == M) ? 1 : M; + unsigned selem = N; + unsigned i, j, k; switch (size) { case 0: /* 8-bit operations. */ - if (all) - for (i = 0; i < (16 * N); i++) - aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15, - aarch64_get_mem_u8 (cpu, address + i)); - else - for (i = 0; i < (8 * N); i++) - aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7, - aarch64_get_mem_u8 (cpu, address + i)); + for (i = 0; i < rpt; i++) + for (j = 0; j < (8 + (8 * all)); j++) + for (k = 0; k < selem; k++) + { + aarch64_set_vec_u8 (cpu, vec_reg (vd, i + k), j, + aarch64_get_mem_u8 (cpu, address)); + address += 1; + } return; case 1: /* 16-bit operations. */ - if (all) - for (i = 0; i < (8 * N); i++) - aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7, - aarch64_get_mem_u16 (cpu, address + i * 2)); - else - for (i = 0; i < (4 * N); i++) - aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3, - aarch64_get_mem_u16 (cpu, address + i * 2)); + for (i = 0; i < rpt; i++) + for (j = 0; j < (4 + (4 * all)); j++) + for (k = 0; k < selem; k++) + { + aarch64_set_vec_u16 (cpu, vec_reg (vd, i + k), j, + aarch64_get_mem_u16 (cpu, address)); + address += 2; + } return; case 2: /* 32-bit operations. */ - if (all) - for (i = 0; i < (4 * N); i++) - aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3, - aarch64_get_mem_u32 (cpu, address + i * 4)); - else - for (i = 0; i < (2 * N); i++) - aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1, - aarch64_get_mem_u32 (cpu, address + i * 4)); + for (i = 0; i < rpt; i++) + for (j = 0; j < (2 + (2 * all)); j++) + for (k = 0; k < selem; k++) + { + aarch64_set_vec_u32 (cpu, vec_reg (vd, i + k), j, + aarch64_get_mem_u32 (cpu, address)); + address += 4; + } return; case 3: /* 64-bit operations. */ - if (all) - for (i = 0; i < (2 * N); i++) - aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1, - aarch64_get_mem_u64 (cpu, address + i * 8)); - else - for (i = 0; i < N; i++) - aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0, - aarch64_get_mem_u64 (cpu, address + i * 8)); + for (i = 0; i < rpt; i++) + for (j = 0; j < (1 + all); j++) + for (k = 0; k < selem; k++) + { + aarch64_set_vec_u64 (cpu, vec_reg (vd, i + k), j, + aarch64_get_mem_u64 (cpu, address)); + address += 8; + } return; - - default: - HALT_UNREACHABLE; } } -/* LD4: load multiple 4-element to four consecutive registers. */ +/* Load multiple 4-element structures into four consecutive registers. */ static void LD4 (sim_cpu *cpu, uint64_t address) { - vec_load (cpu, address, 4); + vec_load (cpu, address, 4, 4); } -/* LD3: load multiple 3-element structures to three consecutive registers. */ +/* Load multiple 3-element structures into three consecutive registers. */ static void LD3 (sim_cpu *cpu, uint64_t address) { - vec_load (cpu, address, 3); + vec_load (cpu, address, 3, 3); } -/* LD2: load multiple 2-element structures to two consecutive registers. */ +/* Load multiple 2-element structures into two consecutive registers. */ static void LD2 (sim_cpu *cpu, uint64_t address) { - vec_load (cpu, address, 2); + vec_load (cpu, address, 2, 2); } /* Load multiple 1-element structures into one register. */ static void LD1_1 (sim_cpu *cpu, uint64_t address) { - int all = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned size = uimm (aarch64_get_instr (cpu), 11, 10); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned i; - - switch (size) - { - case 0: - /* LD1 {Vd.16b}, addr, #16 */ - /* LD1 {Vd.8b}, addr, #8 */ - for (i = 0; i < (all ? 16 : 8); i++) - aarch64_set_vec_u8 (cpu, vd, i, - aarch64_get_mem_u8 (cpu, address + i)); - return; - - case 1: - /* LD1 {Vd.8h}, addr, #16 */ - /* LD1 {Vd.4h}, addr, #8 */ - for (i = 0; i < (all ? 8 : 4); i++) - aarch64_set_vec_u16 (cpu, vd, i, - aarch64_get_mem_u16 (cpu, address + i * 2)); - return; - - case 2: - /* LD1 {Vd.4s}, addr, #16 */ - /* LD1 {Vd.2s}, addr, #8 */ - for (i = 0; i < (all ? 4 : 2); i++) - aarch64_set_vec_u32 (cpu, vd, i, - aarch64_get_mem_u32 (cpu, address + i * 4)); - return; - - case 3: - /* LD1 {Vd.2d}, addr, #16 */ - /* LD1 {Vd.1d}, addr, #8 */ - for (i = 0; i < (all ? 2 : 1); i++) - aarch64_set_vec_u64 (cpu, vd, i, - aarch64_get_mem_u64 (cpu, address + i * 8)); - return; - - default: - HALT_UNREACHABLE; - } + vec_load (cpu, address, 1, 1); } /* Load multiple 1-element structures into two registers. */ static void LD1_2 (sim_cpu *cpu, uint64_t address) { - /* FIXME: This algorithm is *exactly* the same as the LD2 version. - So why have two different instructions ? There must be something - wrong somewhere. */ - vec_load (cpu, address, 2); + vec_load (cpu, address, 1, 2); } /* Load multiple 1-element structures into three registers. */ static void LD1_3 (sim_cpu *cpu, uint64_t address) { - /* FIXME: This algorithm is *exactly* the same as the LD3 version. - So why have two different instructions ? There must be something - wrong somewhere. */ - vec_load (cpu, address, 3); + vec_load (cpu, address, 1, 3); } /* Load multiple 1-element structures into four registers. */ static void LD1_4 (sim_cpu *cpu, uint64_t address) { - /* FIXME: This algorithm is *exactly* the same as the LD4 version. - So why have two different instructions ? There must be something - wrong somewhere. */ - vec_load (cpu, address, 4); + vec_load (cpu, address, 1, 4); } -/* Store multiple N-element structures to N consecutive registers. */ +/* Store multiple N-element structures from M consecutive registers. */ static void -vec_store (sim_cpu *cpu, uint64_t address, unsigned N) +vec_store (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M) { - int all = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned size = uimm (aarch64_get_instr (cpu), 11, 10); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned i; + int all = INSTR (30, 30); + unsigned size = INSTR (11, 10); + unsigned vd = INSTR (4, 0); + unsigned rpt = (N == M) ? 1 : M; + unsigned selem = N; + unsigned i, j, k; switch (size) { case 0: /* 8-bit operations. */ - if (all) - for (i = 0; i < (16 * N); i++) - aarch64_set_mem_u8 - (cpu, address + i, - aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15)); - else - for (i = 0; i < (8 * N); i++) - aarch64_set_mem_u8 - (cpu, address + i, - aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7)); + for (i = 0; i < rpt; i++) + for (j = 0; j < (8 + (8 * all)); j++) + for (k = 0; k < selem; k++) + { + aarch64_set_mem_u8 + (cpu, address, + aarch64_get_vec_u8 (cpu, vec_reg (vd, i + k), j)); + address += 1; + } return; case 1: /* 16-bit operations. */ - if (all) - for (i = 0; i < (8 * N); i++) - aarch64_set_mem_u16 - (cpu, address + i * 2, - aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7)); - else - for (i = 0; i < (4 * N); i++) - aarch64_set_mem_u16 - (cpu, address + i * 2, - aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3)); + for (i = 0; i < rpt; i++) + for (j = 0; j < (4 + (4 * all)); j++) + for (k = 0; k < selem; k++) + { + aarch64_set_mem_u16 + (cpu, address, + aarch64_get_vec_u16 (cpu, vec_reg (vd, i + k), j)); + address += 2; + } return; case 2: /* 32-bit operations. */ - if (all) - for (i = 0; i < (4 * N); i++) - aarch64_set_mem_u32 - (cpu, address + i * 4, - aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3)); - else - for (i = 0; i < (2 * N); i++) - aarch64_set_mem_u32 - (cpu, address + i * 4, - aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1)); + for (i = 0; i < rpt; i++) + for (j = 0; j < (2 + (2 * all)); j++) + for (k = 0; k < selem; k++) + { + aarch64_set_mem_u32 + (cpu, address, + aarch64_get_vec_u32 (cpu, vec_reg (vd, i + k), j)); + address += 4; + } return; case 3: /* 64-bit operations. */ - if (all) - for (i = 0; i < (2 * N); i++) - aarch64_set_mem_u64 - (cpu, address + i * 8, - aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1)); - else - for (i = 0; i < N; i++) - aarch64_set_mem_u64 - (cpu, address + i * 8, - aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0)); + for (i = 0; i < rpt; i++) + for (j = 0; j < (1 + all); j++) + for (k = 0; k < selem; k++) + { + aarch64_set_mem_u64 + (cpu, address, + aarch64_get_vec_u64 (cpu, vec_reg (vd, i + k), j)); + address += 8; + } return; - - default: - HALT_UNREACHABLE; } } -/* Store multiple 4-element structure to four consecutive registers. */ +/* Store multiple 4-element structure from four consecutive registers. */ static void ST4 (sim_cpu *cpu, uint64_t address) { - vec_store (cpu, address, 4); + vec_store (cpu, address, 4, 4); } -/* Store multiple 3-element structures to three consecutive registers. */ +/* Store multiple 3-element structures from three consecutive registers. */ static void ST3 (sim_cpu *cpu, uint64_t address) { - vec_store (cpu, address, 3); + vec_store (cpu, address, 3, 3); } -/* Store multiple 2-element structures to two consecutive registers. */ +/* Store multiple 2-element structures from two consecutive registers. */ static void ST2 (sim_cpu *cpu, uint64_t address) { - vec_store (cpu, address, 2); + vec_store (cpu, address, 2, 2); } -/* Store multiple 1-element structures into one register. */ +/* Store multiple 1-element structures from one register. */ static void ST1_1 (sim_cpu *cpu, uint64_t address) { - int all = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned size = uimm (aarch64_get_instr (cpu), 11, 10); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned i; - - switch (size) - { - case 0: - for (i = 0; i < (all ? 16 : 8); i++) - aarch64_set_mem_u8 (cpu, address + i, - aarch64_get_vec_u8 (cpu, vd, i)); - return; - - case 1: - for (i = 0; i < (all ? 8 : 4); i++) - aarch64_set_mem_u16 (cpu, address + i * 2, - aarch64_get_vec_u16 (cpu, vd, i)); - return; - - case 2: - for (i = 0; i < (all ? 4 : 2); i++) - aarch64_set_mem_u32 (cpu, address + i * 4, - aarch64_get_vec_u32 (cpu, vd, i)); - return; - - case 3: - for (i = 0; i < (all ? 2 : 1); i++) - aarch64_set_mem_u64 (cpu, address + i * 8, - aarch64_get_vec_u64 (cpu, vd, i)); - return; - - default: - HALT_UNREACHABLE; - } + vec_store (cpu, address, 1, 1); } -/* Store multiple 1-element structures into two registers. */ +/* Store multiple 1-element structures from two registers. */ static void ST1_2 (sim_cpu *cpu, uint64_t address) { - /* FIXME: This algorithm is *exactly* the same as the ST2 version. - So why have two different instructions ? There must be - something wrong somewhere. */ - vec_store (cpu, address, 2); + vec_store (cpu, address, 1, 2); } -/* Store multiple 1-element structures into three registers. */ +/* Store multiple 1-element structures from three registers. */ static void ST1_3 (sim_cpu *cpu, uint64_t address) { - /* FIXME: This algorithm is *exactly* the same as the ST3 version. - So why have two different instructions ? There must be - something wrong somewhere. */ - vec_store (cpu, address, 3); + vec_store (cpu, address, 1, 3); } -/* Store multiple 1-element structures into four registers. */ +/* Store multiple 1-element structures from four registers. */ static void ST1_4 (sim_cpu *cpu, uint64_t address) { - /* FIXME: This algorithm is *exactly* the same as the ST4 version. - So why have two different instructions ? There must be - something wrong somewhere. */ - vec_store (cpu, address, 4); -} + vec_store (cpu, address, 1, 4); +} + +#define LDn_STn_SINGLE_LANE_AND_SIZE() \ + do \ + { \ + switch (INSTR (15, 14)) \ + { \ + case 0: \ + lane = (full << 3) | (s << 2) | size; \ + size = 0; \ + break; \ + \ + case 1: \ + if ((size & 1) == 1) \ + HALT_UNALLOC; \ + lane = (full << 2) | (s << 1) | (size >> 1); \ + size = 1; \ + break; \ + \ + case 2: \ + if ((size & 2) == 2) \ + HALT_UNALLOC; \ + \ + if ((size & 1) == 0) \ + { \ + lane = (full << 1) | s; \ + size = 2; \ + } \ + else \ + { \ + if (s) \ + HALT_UNALLOC; \ + lane = full; \ + size = 3; \ + } \ + break; \ + \ + default: \ + HALT_UNALLOC; \ + } \ + } \ + while (0) +/* Load single structure into one lane of N registers. */ static void -do_vec_LDnR (sim_cpu *cpu, uint64_t address) +do_vec_LDn_single (sim_cpu *cpu, uint64_t address) { /* instr[31] = 0 instr[30] = element selector 0=>half, 1=>all elements instr[29,24] = 00 1101 instr[23] = 0=>simple, 1=>post instr[22] = 1 - instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1) + instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1) instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP), 11111 (immediate post inc) - instr[15,14] = 11 - instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1) - instr[12] = 0 - instr[11,10] = element size 00=> byte(b), 01=> half(h), - 10=> word(s), 11=> double(d) + instr[15,13] = opcode + instr[12] = S, used for lane number + instr[11,10] = size, also used for lane number instr[9,5] = address instr[4,0] = Vd */ - unsigned full = uimm (aarch64_get_instr (cpu), 30, 30); - unsigned vd = uimm (aarch64_get_instr (cpu), 4, 0); - unsigned size = uimm (aarch64_get_instr (cpu), 11, 10); + unsigned full = INSTR (30, 30); + unsigned vd = INSTR (4, 0); + unsigned size = INSTR (11, 10); + unsigned s = INSTR (12, 12); + int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; + int lane = 0; int i; NYI_assert (29, 24, 0x0D); NYI_assert (22, 22, 1); - NYI_assert (15, 14, 3); - NYI_assert (12, 12, 0); - - switch ((uimm (aarch64_get_instr (cpu), 13, 13) << 1) - | uimm (aarch64_get_instr (cpu), 21, 21)) - { - case 0: /* LD1R. */ - switch (size) - { - case 0: - { - uint8_t val = aarch64_get_mem_u8 (cpu, address); - for (i = 0; i < (full ? 16 : 8); i++) - aarch64_set_vec_u8 (cpu, vd, i, val); - break; - } - - case 1: - { - uint16_t val = aarch64_get_mem_u16 (cpu, address); - for (i = 0; i < (full ? 8 : 4); i++) - aarch64_set_vec_u16 (cpu, vd, i, val); - break; - } - - case 2: - { - uint32_t val = aarch64_get_mem_u32 (cpu, address); - for (i = 0; i < (full ? 4 : 2); i++) - aarch64_set_vec_u32 (cpu, vd, i, val); - break; - } - case 3: - { - uint64_t val = aarch64_get_mem_u64 (cpu, address); - for (i = 0; i < (full ? 2 : 1); i++) - aarch64_set_vec_u64 (cpu, vd, i, val); - break; - } + /* Compute the lane number first (using size), and then compute size. */ + LDn_STn_SINGLE_LANE_AND_SIZE (); - default: - HALT_UNALLOC; + for (i = 0; i < nregs; i++) + switch (size) + { + case 0: + { + uint8_t val = aarch64_get_mem_u8 (cpu, address + i); + aarch64_set_vec_u8 (cpu, vd + i, lane, val); + break; } - break; - case 1: /* LD2R. */ - switch (size) + case 1: { - case 0: - { - uint8_t val1 = aarch64_get_mem_u8 (cpu, address); - uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1); - - for (i = 0; i < (full ? 16 : 8); i++) - { - aarch64_set_vec_u8 (cpu, vd, 0, val1); - aarch64_set_vec_u8 (cpu, vd + 1, 0, val2); - } - break; - } - - case 1: - { - uint16_t val1 = aarch64_get_mem_u16 (cpu, address); - uint16_t val2 = aarch64_get_mem_u16 (cpu, address + 2); - - for (i = 0; i < (full ? 8 : 4); i++) - { - aarch64_set_vec_u16 (cpu, vd, 0, val1); - aarch64_set_vec_u16 (cpu, vd + 1, 0, val2); - } - break; - } - - case 2: - { - uint32_t val1 = aarch64_get_mem_u32 (cpu, address); - uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4); - - for (i = 0; i < (full ? 4 : 2); i++) - { - aarch64_set_vec_u32 (cpu, vd, 0, val1); - aarch64_set_vec_u32 (cpu, vd + 1, 0, val2); - } - break; - } - - case 3: - { - uint64_t val1 = aarch64_get_mem_u64 (cpu, address); - uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8); - - for (i = 0; i < (full ? 2 : 1); i++) - { - aarch64_set_vec_u64 (cpu, vd, 0, val1); - aarch64_set_vec_u64 (cpu, vd + 1, 0, val2); - } - break; - } - - default: - HALT_UNALLOC; + uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2)); + aarch64_set_vec_u16 (cpu, vd + i, lane, val); + break; } - break; - case 2: /* LD3R. */ - switch (size) + case 2: { - case 0: - { - uint8_t val1 = aarch64_get_mem_u8 (cpu, address); - uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1); - uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2); - - for (i = 0; i < (full ? 16 : 8); i++) - { - aarch64_set_vec_u8 (cpu, vd, 0, val1); - aarch64_set_vec_u8 (cpu, vd + 1, 0, val2); - aarch64_set_vec_u8 (cpu, vd + 2, 0, val3); - } - } + uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4)); + aarch64_set_vec_u32 (cpu, vd + i, lane, val); break; + } - case 1: - { - uint32_t val1 = aarch64_get_mem_u16 (cpu, address); - uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2); - uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4); - - for (i = 0; i < (full ? 8 : 4); i++) - { - aarch64_set_vec_u16 (cpu, vd, 0, val1); - aarch64_set_vec_u16 (cpu, vd + 1, 0, val2); - aarch64_set_vec_u16 (cpu, vd + 2, 0, val3); - } - } + case 3: + { + uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8)); + aarch64_set_vec_u64 (cpu, vd + i, lane, val); break; + } + } +} - case 2: - { - uint32_t val1 = aarch64_get_mem_u32 (cpu, address); - uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4); - uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8); +/* Store single structure from one lane from N registers. */ +static void +do_vec_STn_single (sim_cpu *cpu, uint64_t address) +{ + /* instr[31] = 0 + instr[30] = element selector 0=>half, 1=>all elements + instr[29,24] = 00 1101 + instr[23] = 0=>simple, 1=>post + instr[22] = 0 + instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1) + instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP), + 11111 (immediate post inc) + instr[15,13] = opcode + instr[12] = S, used for lane number + instr[11,10] = size, also used for lane number + instr[9,5] = address + instr[4,0] = Vd */ - for (i = 0; i < (full ? 4 : 2); i++) - { - aarch64_set_vec_u32 (cpu, vd, 0, val1); - aarch64_set_vec_u32 (cpu, vd + 1, 0, val2); - aarch64_set_vec_u32 (cpu, vd + 2, 0, val3); - } - } - break; + unsigned full = INSTR (30, 30); + unsigned vd = INSTR (4, 0); + unsigned size = INSTR (11, 10); + unsigned s = INSTR (12, 12); + int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; + int lane = 0; + int i; - case 3: - { - uint64_t val1 = aarch64_get_mem_u64 (cpu, address); - uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8); - uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16); + NYI_assert (29, 24, 0x0D); + NYI_assert (22, 22, 0); - for (i = 0; i < (full ? 2 : 1); i++) - { - aarch64_set_vec_u64 (cpu, vd, 0, val1); - aarch64_set_vec_u64 (cpu, vd + 1, 0, val2); - aarch64_set_vec_u64 (cpu, vd + 2, 0, val3); - } - } + /* Compute the lane number first (using size), and then compute size. */ + LDn_STn_SINGLE_LANE_AND_SIZE (); + + for (i = 0; i < nregs; i++) + switch (size) + { + case 0: + { + uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane); + aarch64_set_mem_u8 (cpu, address + i, val); break; + } - default: - HALT_UNALLOC; + case 1: + { + uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane); + aarch64_set_mem_u16 (cpu, address + (i * 2), val); + break; } - break; - case 3: /* LD4R. */ - switch (size) + case 2: { - case 0: - { - uint8_t val1 = aarch64_get_mem_u8 (cpu, address); - uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1); - uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2); - uint8_t val4 = aarch64_get_mem_u8 (cpu, address + 3); + uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane); + aarch64_set_mem_u32 (cpu, address + (i * 4), val); + break; + } - for (i = 0; i < (full ? 16 : 8); i++) - { - aarch64_set_vec_u8 (cpu, vd, 0, val1); - aarch64_set_vec_u8 (cpu, vd + 1, 0, val2); - aarch64_set_vec_u8 (cpu, vd + 2, 0, val3); - aarch64_set_vec_u8 (cpu, vd + 3, 0, val4); - } - } + case 3: + { + uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane); + aarch64_set_mem_u64 (cpu, address + (i * 8), val); break; + } + } +} - case 1: - { - uint32_t val1 = aarch64_get_mem_u16 (cpu, address); - uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2); - uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4); - uint32_t val4 = aarch64_get_mem_u16 (cpu, address + 6); +/* Load single structure into all lanes of N registers. */ +static void +do_vec_LDnR (sim_cpu *cpu, uint64_t address) +{ + /* instr[31] = 0 + instr[30] = element selector 0=>half, 1=>all elements + instr[29,24] = 00 1101 + instr[23] = 0=>simple, 1=>post + instr[22] = 1 + instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1) + instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP), + 11111 (immediate post inc) + instr[15,14] = 11 + instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1) + instr[12] = 0 + instr[11,10] = element size 00=> byte(b), 01=> half(h), + 10=> word(s), 11=> double(d) + instr[9,5] = address + instr[4,0] = Vd */ - for (i = 0; i < (full ? 8 : 4); i++) - { - aarch64_set_vec_u16 (cpu, vd, 0, val1); - aarch64_set_vec_u16 (cpu, vd + 1, 0, val2); - aarch64_set_vec_u16 (cpu, vd + 2, 0, val3); - aarch64_set_vec_u16 (cpu, vd + 3, 0, val4); - } - } - break; + unsigned full = INSTR (30, 30); + unsigned vd = INSTR (4, 0); + unsigned size = INSTR (11, 10); + int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; + int i, n; - case 2: - { - uint32_t val1 = aarch64_get_mem_u32 (cpu, address); - uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4); - uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8); - uint32_t val4 = aarch64_get_mem_u32 (cpu, address + 12); + NYI_assert (29, 24, 0x0D); + NYI_assert (22, 22, 1); + NYI_assert (15, 14, 3); + NYI_assert (12, 12, 0); - for (i = 0; i < (full ? 4 : 2); i++) - { - aarch64_set_vec_u32 (cpu, vd, 0, val1); - aarch64_set_vec_u32 (cpu, vd + 1, 0, val2); - aarch64_set_vec_u32 (cpu, vd + 2, 0, val3); - aarch64_set_vec_u32 (cpu, vd + 3, 0, val4); - } - } + for (n = 0; n < nregs; n++) + switch (size) + { + case 0: + { + uint8_t val = aarch64_get_mem_u8 (cpu, address + n); + for (i = 0; i < (full ? 16 : 8); i++) + aarch64_set_vec_u8 (cpu, vd + n, i, val); break; + } - case 3: - { - uint64_t val1 = aarch64_get_mem_u64 (cpu, address); - uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8); - uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16); - uint64_t val4 = aarch64_get_mem_u64 (cpu, address + 24); + case 1: + { + uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2)); + for (i = 0; i < (full ? 8 : 4); i++) + aarch64_set_vec_u16 (cpu, vd + n, i, val); + break; + } - for (i = 0; i < (full ? 2 : 1); i++) - { - aarch64_set_vec_u64 (cpu, vd, 0, val1); - aarch64_set_vec_u64 (cpu, vd + 1, 0, val2); - aarch64_set_vec_u64 (cpu, vd + 2, 0, val3); - aarch64_set_vec_u64 (cpu, vd + 3, 0, val4); - } - } + case 2: + { + uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4)); + for (i = 0; i < (full ? 4 : 2); i++) + aarch64_set_vec_u32 (cpu, vd + n, i, val); break; + } - default: - HALT_UNALLOC; + case 3: + { + uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8)); + for (i = 0; i < (full ? 2 : 1); i++) + aarch64_set_vec_u64 (cpu, vd + n, i, val); + break; } - break; - default: - HALT_UNALLOC; - } + default: + HALT_UNALLOC; + } } static void @@ -10880,7 +11994,7 @@ do_vec_load_store (sim_cpu *cpu) instr[31] = 0 instr[30] = element selector 0=>half, 1=>all elements instr[29,25] = 00110 - instr[24] = ? + instr[24] = 0=>multiple struct, 1=>single struct instr[23] = 0=>simple, 1=>post instr[22] = 0=>store, 1=>load instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR) @@ -10908,62 +12022,105 @@ do_vec_load_store (sim_cpu *cpu) instr[9,5] = Vn, can be SP instr[4,0] = Vd */ + int single; int post; int load; unsigned vn; uint64_t address; int type; - if (uimm (aarch64_get_instr (cpu), 31, 31) != 0 - || uimm (aarch64_get_instr (cpu), 29, 25) != 0x06) + if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06) HALT_NYI; - type = uimm (aarch64_get_instr (cpu), 15, 12); - if (type != 0xE && type != 0xE && uimm (aarch64_get_instr (cpu), 21, 21) != 0) - HALT_NYI; - - post = uimm (aarch64_get_instr (cpu), 23, 23); - load = uimm (aarch64_get_instr (cpu), 22, 22); - vn = uimm (aarch64_get_instr (cpu), 9, 5); + single = INSTR (24, 24); + post = INSTR (23, 23); + load = INSTR (22, 22); + type = INSTR (15, 12); + vn = INSTR (9, 5); address = aarch64_get_reg_u64 (cpu, vn, SP_OK); + if (! single && INSTR (21, 21) != 0) + HALT_UNALLOC; + if (post) { - unsigned vm = uimm (aarch64_get_instr (cpu), 20, 16); + unsigned vm = INSTR (20, 16); if (vm == R31) { unsigned sizeof_operation; - switch (type) + if (single) + { + if ((type >= 0) && (type <= 11)) + { + int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; + switch (INSTR (15, 14)) + { + case 0: + sizeof_operation = nregs * 1; + break; + case 1: + sizeof_operation = nregs * 2; + break; + case 2: + if (INSTR (10, 10) == 0) + sizeof_operation = nregs * 4; + else + sizeof_operation = nregs * 8; + break; + default: + HALT_UNALLOC; + } + } + else if (type == 0xC) + { + sizeof_operation = INSTR (21, 21) ? 2 : 1; + sizeof_operation <<= INSTR (11, 10); + } + else if (type == 0xE) + { + sizeof_operation = INSTR (21, 21) ? 4 : 3; + sizeof_operation <<= INSTR (11, 10); + } + else + HALT_UNALLOC; + } + else { - case 0: sizeof_operation = 32; break; - case 4: sizeof_operation = 24; break; - case 8: sizeof_operation = 16; break; + switch (type) + { + case 0: sizeof_operation = 32; break; + case 4: sizeof_operation = 24; break; + case 8: sizeof_operation = 16; break; - case 0xC: - sizeof_operation = uimm (aarch64_get_instr (cpu), 21, 21) ? 2 : 1; - sizeof_operation <<= uimm (aarch64_get_instr (cpu), 11, 10); - break; + case 7: + /* One register, immediate offset variant. */ + sizeof_operation = 8; + break; - case 0xE: - sizeof_operation = uimm (aarch64_get_instr (cpu), 21, 21) ? 8 : 4; - sizeof_operation <<= uimm (aarch64_get_instr (cpu), 11, 10); - break; + case 10: + /* Two registers, immediate offset variant. */ + sizeof_operation = 16; + break; - case 2: - case 6: - case 10: - case 7: - sizeof_operation = 2 << uimm (aarch64_get_instr (cpu), 11, 10); - break; + case 6: + /* Three registers, immediate offset variant. */ + sizeof_operation = 24; + break; - default: - HALT_UNALLOC; - } + case 2: + /* Four registers, immediate offset variant. */ + sizeof_operation = 32; + break; - if (uimm (aarch64_get_instr (cpu), 30, 30)) - sizeof_operation *= 2; + default: + HALT_UNALLOC; + } + + if (INSTR (30, 30)) + sizeof_operation *= 2; + } aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation); } @@ -10976,6 +12133,29 @@ do_vec_load_store (sim_cpu *cpu) NYI_assert (20, 16, 0); } + if (single) + { + if (load) + { + if ((type >= 0) && (type <= 11)) + do_vec_LDn_single (cpu, address); + else if ((type == 0xC) || (type == 0xE)) + do_vec_LDnR (cpu, address); + else + HALT_UNALLOC; + return; + } + + /* Stores. */ + if ((type >= 0) && (type <= 11)) + { + do_vec_STn_single (cpu, address); + return; + } + + HALT_UNALLOC; + } + if (load) { switch (type) @@ -10988,11 +12168,8 @@ do_vec_load_store (sim_cpu *cpu) case 10: LD1_2 (cpu, address); return; case 7: LD1_1 (cpu, address); return; - case 0xE: - case 0xC: do_vec_LDnR (cpu, address); return; - default: - HALT_NYI; + HALT_UNALLOC; } } @@ -11007,7 +12184,7 @@ do_vec_load_store (sim_cpu *cpu) case 10: ST1_2 (cpu, address); return; case 7: ST1_1 (cpu, address); return; default: - HALT_NYI; + HALT_UNALLOC; } } @@ -11053,36 +12230,27 @@ dexLdSt (sim_cpu *cpu) static void dexLogicalShiftedRegister (sim_cpu *cpu) { - /* assert instr[28:24] = 01010 - instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit - instr[30,29:21] = op,N : 000 ==> AND, 001 ==> BIC, - 010 ==> ORR, 011 ==> ORN - 100 ==> EOR, 101 ==> EON, - 110 ==> ANDS, 111 ==> BICS + /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[30,29] = op + instr[28:24] = 01010 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR + instr[21] = N + instr[20,16] = Rm instr[15,10] = count : must be 0xxxxx for 32 bit - instr[9,5] = Rn - instr[4,0] = Rd */ - - /* unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); */ - uint32_t dispatch; - Shift shiftType; - uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); + instr[9,5] = Rn + instr[4,0] = Rd */ - /* 32 bit operations must have count[5] = 0. */ - /* or else we have an UNALLOC. */ - uint32_t count = uimm (aarch64_get_instr (cpu), 15, 10); + uint32_t size = INSTR (31, 31); + Shift shiftType = INSTR (23, 22); + uint32_t count = INSTR (15, 10); - if (!size && uimm (count, 5, 5)) + /* 32 bit operations must have count[5] = 0. + or else we have an UNALLOC. */ + if (size == 0 && uimm (count, 5, 5)) HALT_UNALLOC; - shiftType = shift (aarch64_get_instr (cpu), 22); - - /* dispatch on size:op:N i.e aarch64_get_instr (cpu)[31,29:21]. */ - dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 29) << 1) - | uimm (aarch64_get_instr (cpu), 21, 21)); - - switch (dispatch) + /* Dispatch on size:op:N. */ + switch ((INSTR (31, 29) << 1) | INSTR (21, 21)) { case 0: and32_shift (cpu, shiftType, count); return; case 1: bic32_shift (cpu, shiftType, count); return; @@ -11100,7 +12268,6 @@ dexLogicalShiftedRegister (sim_cpu *cpu) case 13:eon64_shift (cpu, shiftType, count); return; case 14:ands64_shift (cpu, shiftType, count); return; case 15:bics64_shift (cpu, shiftType, count); return; - default: HALT_UNALLOC; } } @@ -11108,9 +12275,9 @@ dexLogicalShiftedRegister (sim_cpu *cpu) static void csel32 (sim_cpu *cpu, CondCode cc) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, testConditionCode (cpu, cc) @@ -11122,9 +12289,9 @@ csel32 (sim_cpu *cpu, CondCode cc) static void csel64 (sim_cpu *cpu, CondCode cc) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, testConditionCode (cpu, cc) @@ -11136,9 +12303,9 @@ csel64 (sim_cpu *cpu, CondCode cc) static void csinc32 (sim_cpu *cpu, CondCode cc) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, testConditionCode (cpu, cc) @@ -11150,9 +12317,9 @@ csinc32 (sim_cpu *cpu, CondCode cc) static void csinc64 (sim_cpu *cpu, CondCode cc) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, testConditionCode (cpu, cc) @@ -11164,9 +12331,9 @@ csinc64 (sim_cpu *cpu, CondCode cc) static void csinv32 (sim_cpu *cpu, CondCode cc) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, testConditionCode (cpu, cc) @@ -11178,9 +12345,9 @@ csinv32 (sim_cpu *cpu, CondCode cc) static void csinv64 (sim_cpu *cpu, CondCode cc) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, testConditionCode (cpu, cc) @@ -11192,9 +12359,9 @@ csinv64 (sim_cpu *cpu, CondCode cc) static void csneg32 (sim_cpu *cpu, CondCode cc) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, testConditionCode (cpu, cc) @@ -11206,9 +12373,9 @@ csneg32 (sim_cpu *cpu, CondCode cc) static void csneg64 (sim_cpu *cpu, CondCode cc) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, testConditionCode (cpu, cc) @@ -11219,8 +12386,8 @@ csneg64 (sim_cpu *cpu, CondCode cc) static void dexCondSelect (sim_cpu *cpu) { - /* assert instr[28,21] = 11011011 - instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + /* instr[28,21] = 11011011 + instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC, 100 ==> CSINV, 101 ==> CSNEG, _1_ ==> UNALLOC @@ -11228,10 +12395,9 @@ dexCondSelect (sim_cpu *cpu) instr[15,12] = cond instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */ - CondCode cc; - uint32_t dispatch; - uint32_t S = uimm (aarch64_get_instr (cpu), 29, 29); - uint32_t op2 = uimm (aarch64_get_instr (cpu), 11, 10); + CondCode cc = INSTR (15, 12); + uint32_t S = INSTR (29, 29); + uint32_t op2 = INSTR (11, 10); if (S == 1) HALT_UNALLOC; @@ -11239,10 +12405,7 @@ dexCondSelect (sim_cpu *cpu) if (op2 & 0x2) HALT_UNALLOC; - cc = condcode (aarch64_get_instr (cpu), 12); - dispatch = ((uimm (aarch64_get_instr (cpu), 31, 30) << 1) | op2); - - switch (dispatch) + switch ((INSTR (31, 30) << 1) | op2) { case 0: csel32 (cpu, cc); return; case 1: csinc32 (cpu, cc); return; @@ -11252,7 +12415,6 @@ dexCondSelect (sim_cpu *cpu) case 5: csinc64 (cpu, cc); return; case 6: csinv64 (cpu, cc); return; case 7: csneg64 (cpu, cc); return; - default: HALT_UNALLOC; } } @@ -11353,8 +12515,8 @@ leading64 (uint64_t value) static void cls32 (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); /* N.B. the result needs to exclude the leading bit. */ aarch64_set_reg_u64 @@ -11365,8 +12527,8 @@ cls32 (sim_cpu *cpu) static void cls64 (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); /* N.B. the result needs to exclude the leading bit. */ aarch64_set_reg_u64 @@ -11377,8 +12539,8 @@ cls64 (sim_cpu *cpu) static void clz32 (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); /* if the sign (top) bit is set then the count is 0. */ @@ -11392,8 +12554,8 @@ clz32 (sim_cpu *cpu) static void clz64 (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); /* if the sign (top) bit is set then the count is 0. */ @@ -11407,8 +12569,8 @@ clz64 (sim_cpu *cpu) static void rbit32 (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); uint32_t result = 0; int i; @@ -11426,8 +12588,8 @@ rbit32 (sim_cpu *cpu) static void rbit64 (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); uint64_t result = 0; int i; @@ -11435,7 +12597,7 @@ rbit64 (sim_cpu *cpu) for (i = 0; i < 64; i++) { result <<= 1; - result |= (value & 1L); + result |= (value & 1UL); value >>= 1; } aarch64_set_reg_u64 (cpu, rd, NO_SP, result); @@ -11445,8 +12607,8 @@ rbit64 (sim_cpu *cpu) static void rev32 (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); uint32_t result = 0; int i; @@ -11464,8 +12626,8 @@ rev32 (sim_cpu *cpu) static void rev64 (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); uint64_t result = 0; int i; @@ -11484,8 +12646,8 @@ rev64 (sim_cpu *cpu) static void revh32 (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); uint32_t result = 0; int i; @@ -11504,8 +12666,8 @@ revh32 (sim_cpu *cpu) static void revh64 (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); uint64_t result = 0; int i; @@ -11522,22 +12684,22 @@ revh64 (sim_cpu *cpu) static void dexDataProc1Source (sim_cpu *cpu) { - /* assert instr[30] == 1 - aarch64_get_instr (cpu)[28,21] == 111010110 - instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit - instr[29] = S : 0 ==> ok, 1 ==> UNALLOC + /* instr[30] = 1 + instr[28,21] = 111010110 + instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit + instr[29] = S : 0 ==> ok, 1 ==> UNALLOC instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16, 000010 ==> REV, 000011 ==> UNALLOC 000100 ==> CLZ, 000101 ==> CLS ow ==> UNALLOC - instr[9,5] = rn : may not be SP - instr[4,0] = rd : may not be SP. */ + instr[9,5] = rn : may not be SP + instr[4,0] = rd : may not be SP. */ - uint32_t S = uimm (aarch64_get_instr (cpu), 29, 29); - uint32_t opcode2 = uimm (aarch64_get_instr (cpu), 20, 16); - uint32_t opcode = uimm (aarch64_get_instr (cpu), 15, 10); - uint32_t dispatch = ((uimm (aarch64_get_instr (cpu), 31, 31) << 3) | opcode); + uint32_t S = INSTR (29, 29); + uint32_t opcode2 = INSTR (20, 16); + uint32_t opcode = INSTR (15, 10); + uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode); if (S == 1) HALT_UNALLOC; @@ -11577,9 +12739,9 @@ dexDataProc1Source (sim_cpu *cpu) static void asrv32 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, @@ -11591,9 +12753,9 @@ asrv32 (sim_cpu *cpu) static void asrv64 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, @@ -11605,9 +12767,9 @@ asrv64 (sim_cpu *cpu) static void lslv32 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, @@ -11619,9 +12781,9 @@ lslv32 (sim_cpu *cpu) static void lslv64 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, @@ -11633,9 +12795,9 @@ lslv64 (sim_cpu *cpu) static void lsrv32 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, @@ -11647,9 +12809,9 @@ lsrv32 (sim_cpu *cpu) static void lsrv64 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, @@ -11661,9 +12823,9 @@ lsrv64 (sim_cpu *cpu) static void rorv32 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, @@ -11675,9 +12837,9 @@ rorv32 (sim_cpu *cpu) static void rorv64 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); aarch64_set_reg_u64 (cpu, rd, NO_SP, @@ -11692,9 +12854,9 @@ rorv64 (sim_cpu *cpu) static void cpuiv32 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); /* N.B. the pseudo-code does the divide using 64 bit data. */ /* TODO : check that this rounds towards zero as required. */ int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP); @@ -11708,9 +12870,9 @@ cpuiv32 (sim_cpu *cpu) static void cpuiv64 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); /* TODO : check that this rounds towards zero as required. */ int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP); @@ -11724,9 +12886,9 @@ cpuiv64 (sim_cpu *cpu) static void udiv32 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); /* N.B. the pseudo-code does the divide using 64 bit data. */ uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP); @@ -11740,9 +12902,9 @@ udiv32 (sim_cpu *cpu) static void udiv64 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); /* TODO : check that this rounds towards zero as required. */ uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP); @@ -11765,8 +12927,8 @@ dexDataProc2Source (sim_cpu *cpu) ow ==> UNALLOC. */ uint32_t dispatch; - uint32_t S = uimm (aarch64_get_instr (cpu), 29, 29); - uint32_t opcode = uimm (aarch64_get_instr (cpu), 15, 10); + uint32_t S = INSTR (29, 29); + uint32_t opcode = INSTR (15, 10); if (S == 1) HALT_UNALLOC; @@ -11774,7 +12936,7 @@ dexDataProc2Source (sim_cpu *cpu) if (opcode & 0x34) HALT_UNALLOC; - dispatch = ( (uimm (aarch64_get_instr (cpu), 31, 31) << 3) + dispatch = ( (INSTR (31, 31) << 3) | (uimm (opcode, 3, 3) << 2) | uimm (opcode, 1, 0)); switch (dispatch) @@ -11802,11 +12964,12 @@ dexDataProc2Source (sim_cpu *cpu) static void madd32 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned ra = INSTR (14, 10); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, ra, NO_SP) + aarch64_get_reg_u32 (cpu, rn, NO_SP) @@ -11817,26 +12980,28 @@ madd32 (sim_cpu *cpu) static void madd64 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned ra = INSTR (14, 10); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, ra, NO_SP) - + aarch64_get_reg_u64 (cpu, rn, NO_SP) - * aarch64_get_reg_u64 (cpu, rm, NO_SP)); + + (aarch64_get_reg_u64 (cpu, rn, NO_SP) + * aarch64_get_reg_u64 (cpu, rm, NO_SP))); } /* 32 bit multiply and sub. */ static void msub32 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned ra = INSTR (14, 10); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, ra, NO_SP) - aarch64_get_reg_u32 (cpu, rn, NO_SP) @@ -11847,11 +13012,12 @@ msub32 (sim_cpu *cpu) static void msub64 (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned ra = INSTR (14, 10); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, ra, NO_SP) - aarch64_get_reg_u64 (cpu, rn, NO_SP) @@ -11862,10 +13028,10 @@ msub64 (sim_cpu *cpu) static void smaddl (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned ra = INSTR (14, 10); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); /* N.B. we need to multiply the signed 32 bit values in rn, rm to obtain a 64 bit product. */ @@ -11880,10 +13046,10 @@ smaddl (sim_cpu *cpu) static void smsubl (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned ra = INSTR (14, 10); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); /* N.B. we need to multiply the signed 32 bit values in rn, rm to obtain a 64 bit product. */ @@ -11936,7 +13102,6 @@ mul64hi (uint64_t value1, uint64_t value2) uint64_t value2_hi = highWordToU64 (value2); /* Cross-multiply and collect results. */ - uint64_t xproductlo = value1_lo * value2_lo; uint64_t xproductmid1 = value1_lo * value2_hi; uint64_t xproductmid2 = value1_hi * value2_lo; @@ -11962,6 +13127,8 @@ mul64hi (uint64_t value1, uint64_t value2) /* Drop lowest 32 bits of middle cross-product. */ result = resultmid1 >> 32; + /* Move carry bit to just above middle cross-product highest bit. */ + carry = carry << 32; /* Add top cross-product plus and any carry. */ result += xproducthi + carry; @@ -11975,16 +13142,16 @@ static void smulh (sim_cpu *cpu) { uint64_t uresult; - int64_t result; - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); - GReg ra = greg (aarch64_get_instr (cpu), 10); - int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); - int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); + int64_t result; + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + GReg ra = INSTR (14, 10); + int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); + int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); uint64_t uvalue1; uint64_t uvalue2; - int64_t signum = 1; + int negate = 0; if (ra != R31) HALT_UNALLOC; @@ -11993,7 +13160,7 @@ smulh (sim_cpu *cpu) the fix the sign up afterwards. */ if (value1 < 0) { - signum *= -1L; + negate = !negate; uvalue1 = -value1; } else @@ -12003,7 +13170,7 @@ smulh (sim_cpu *cpu) if (value2 < 0) { - signum *= -1L; + negate = !negate; uvalue2 = -value2; } else @@ -12011,9 +13178,19 @@ smulh (sim_cpu *cpu) uvalue2 = value2; } + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + uresult = mul64hi (uvalue1, uvalue2); result = uresult; - result *= signum; + + if (negate) + { + /* Multiply 128-bit result by -1, which means highpart gets inverted, + and has carry in added only if low part is 0. */ + result = ~result; + if ((uvalue1 * uvalue2) == 0) + result += 1; + } aarch64_set_reg_s64 (cpu, rd, NO_SP, result); } @@ -12023,11 +13200,12 @@ smulh (sim_cpu *cpu) static void umaddl (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned ra = INSTR (14, 10); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* N.B. we need to multiply the signed 32 bit values in rn, rm to obtain a 64 bit product. */ aarch64_set_reg_u64 @@ -12041,11 +13219,12 @@ umaddl (sim_cpu *cpu) static void umsubl (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned ra = uimm (aarch64_get_instr (cpu), 14, 10); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rm = INSTR (20, 16); + unsigned ra = INSTR (14, 10); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); /* N.B. we need to multiply the signed 32 bit values in rn, rm to obtain a 64 bit product. */ aarch64_set_reg_u64 @@ -12060,14 +13239,15 @@ umsubl (sim_cpu *cpu) static void umulh (sim_cpu *cpu) { - unsigned rm = uimm (aarch64_get_instr (cpu), 20, 16); - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); - unsigned rd = uimm (aarch64_get_instr (cpu), 4, 0); - GReg ra = greg (aarch64_get_instr (cpu), 10); + unsigned rm = INSTR (20, 16); + unsigned rn = INSTR (9, 5); + unsigned rd = INSTR (4, 0); + GReg ra = INSTR (14, 10); if (ra != R31) HALT_UNALLOC; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rd, NO_SP, mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP), aarch64_get_reg_u64 (cpu, rm, NO_SP))); @@ -12089,10 +13269,10 @@ dexDataProc3Source (sim_cpu *cpu) ow ==> UNALLOC. */ uint32_t dispatch; - uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); - uint32_t op54 = uimm (aarch64_get_instr (cpu), 30, 29); - uint32_t op31 = uimm (aarch64_get_instr (cpu), 23, 21); - uint32_t o0 = uimm (aarch64_get_instr (cpu), 15, 15); + uint32_t size = INSTR (31, 31); + uint32_t op54 = INSTR (30, 29); + uint32_t op31 = INSTR (23, 21); + uint32_t o0 = INSTR (15, 15); if (op54 != 0) HALT_UNALLOC; @@ -12221,6 +13401,7 @@ static unsigned stack_depth = 0; static void bl (sim_cpu *cpu, int32_t offset) { + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_save_LR (cpu); aarch64_set_next_PC_by_offset (cpu, offset); @@ -12231,7 +13412,8 @@ bl (sim_cpu *cpu, int32_t offset) " %*scall %" PRIx64 " [%s]" " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]", stack_depth, " ", aarch64_get_next_PC (cpu), - aarch64_get_func (aarch64_get_next_PC (cpu)), + aarch64_get_func (CPU_STATE (cpu), + aarch64_get_next_PC (cpu)), aarch64_get_reg_u64 (cpu, 0, NO_SP), aarch64_get_reg_u64 (cpu, 1, NO_SP), aarch64_get_reg_u64 (cpu, 2, NO_SP) @@ -12246,7 +13428,8 @@ bl (sim_cpu *cpu, int32_t offset) static void br (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rn = INSTR (9, 5); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP)); } @@ -12254,12 +13437,12 @@ br (sim_cpu *cpu) static void blr (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + /* Ensure we read the destination before we write LR. */ + uint64_t target = aarch64_get_reg_u64 (cpu, INSTR (9, 5), NO_SP); - /* The pseudo code in the spec says we update LR before fetching. - the value from the rn. */ + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_save_LR (cpu); - aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP)); + aarch64_set_next_PC (cpu, target); if (TRACE_BRANCH_P (cpu)) { @@ -12268,7 +13451,8 @@ blr (sim_cpu *cpu) " %*scall %" PRIx64 " [%s]" " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]", stack_depth, " ", aarch64_get_next_PC (cpu), - aarch64_get_func (aarch64_get_next_PC (cpu)), + aarch64_get_func (CPU_STATE (cpu), + aarch64_get_next_PC (cpu)), aarch64_get_reg_u64 (cpu, 0, NO_SP), aarch64_get_reg_u64 (cpu, 1, NO_SP), aarch64_get_reg_u64 (cpu, 2, NO_SP) @@ -12282,9 +13466,10 @@ blr (sim_cpu *cpu) static void ret (sim_cpu *cpu) { - unsigned rn = uimm (aarch64_get_instr (cpu), 9, 5); + unsigned rn = INSTR (9, 5); aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP)); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); if (TRACE_BRANCH_P (cpu)) { TRACE_BRANCH (cpu, @@ -12300,6 +13485,7 @@ ret (sim_cpu *cpu) static void nop (sim_cpu *cpu) { + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); } /* Data synchronization barrier. */ @@ -12307,6 +13493,7 @@ nop (sim_cpu *cpu) static void dsb (sim_cpu *cpu) { + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); } /* Data memory barrier. */ @@ -12314,6 +13501,7 @@ dsb (sim_cpu *cpu) static void dmb (sim_cpu *cpu) { + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); } /* Instruction synchronization barrier. */ @@ -12321,6 +13509,7 @@ dmb (sim_cpu *cpu) static void isb (sim_cpu *cpu) { + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); } static void @@ -12330,7 +13519,7 @@ dexBranchImmediate (sim_cpu *cpu) instr[31] ==> 0 == B, 1 == BL instr[25,0] == imm26 branch offset counted in words. */ - uint32_t top = uimm (aarch64_get_instr (cpu), 31, 31); + uint32_t top = INSTR (31, 31); /* We have a 26 byte signed word offset which we need to pass to the execute routine as a signed byte offset. */ int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2; @@ -12355,7 +13544,8 @@ dexBranchImmediate (sim_cpu *cpu) static void bcc (sim_cpu *cpu, int32_t offset, CondCode cc) { - /* the test returns TRUE if CC is met. */ + /* The test returns TRUE if CC is met. */ + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); if (testConditionCode (cpu, cc)) aarch64_set_next_PC_by_offset (cpu, offset); } @@ -12364,8 +13554,9 @@ bcc (sim_cpu *cpu, int32_t offset, CondCode cc) static void cbnz32 (sim_cpu *cpu, int32_t offset) { - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0) aarch64_set_next_PC_by_offset (cpu, offset); } @@ -12374,8 +13565,9 @@ cbnz32 (sim_cpu *cpu, int32_t offset) static void cbnz (sim_cpu *cpu, int32_t offset) { - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0) aarch64_set_next_PC_by_offset (cpu, offset); } @@ -12384,8 +13576,9 @@ cbnz (sim_cpu *cpu, int32_t offset) static void cbz32 (sim_cpu *cpu, int32_t offset) { - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0) aarch64_set_next_PC_by_offset (cpu, offset); } @@ -12394,8 +13587,9 @@ cbz32 (sim_cpu *cpu, int32_t offset) static void cbz (sim_cpu *cpu, int32_t offset) { - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rt = INSTR (4, 0); + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0) aarch64_set_next_PC_by_offset (cpu, offset); } @@ -12404,19 +13598,21 @@ cbz (sim_cpu *cpu, int32_t offset) static void tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset) { - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rt = INSTR (4, 0); - if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos)) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)) aarch64_set_next_PC_by_offset (cpu, offset); } -/* branch on register bit test zero -- one size fits all. */ +/* Branch on register bit test zero -- one size fits all. */ static void tbz (sim_cpu *cpu, uint32_t pos, int32_t offset) { - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); + unsigned rt = INSTR (4, 0); - if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos))) + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))) aarch64_set_next_PC_by_offset (cpu, offset); } @@ -12429,8 +13625,8 @@ dexCompareBranchImmediate (sim_cpu *cpu) instr[23,5] = simm19 branch offset counted in words instr[4,0] = rt */ - uint32_t size = uimm (aarch64_get_instr (cpu), 31, 31); - uint32_t op = uimm (aarch64_get_instr (cpu), 24, 24); + uint32_t size = INSTR (31, 31); + uint32_t op = INSTR (24, 24); int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2; if (size == 0) @@ -12459,13 +13655,12 @@ dexTestBranchImmediate (sim_cpu *cpu) instr[18,5] = simm14 : signed offset counted in words instr[4,0] = uimm5 */ - uint32_t pos = ((uimm (aarch64_get_instr (cpu), 31, 31) << 4) - | uimm (aarch64_get_instr (cpu), 23,19)); + uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19)); int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2; NYI_assert (30, 25, 0x1b); - if (uimm (aarch64_get_instr (cpu), 24, 24) == 0) + if (INSTR (24, 24) == 0) tbz (cpu, pos, offset); else tbnz (cpu, pos, offset); @@ -12481,9 +13676,7 @@ dexCondBranchImmediate (sim_cpu *cpu) instr[3,0] = cond */ int32_t offset; - CondCode cc; - uint32_t op = ((uimm (aarch64_get_instr (cpu), 24, 24) << 1) - | uimm (aarch64_get_instr (cpu), 4, 4)); + uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4)); NYI_assert (31, 25, 0x2a); @@ -12491,9 +13684,8 @@ dexCondBranchImmediate (sim_cpu *cpu) HALT_UNALLOC; offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2; - cc = condcode (aarch64_get_instr (cpu), 0); - bcc (cpu, offset, cc); + bcc (cpu, offset, INSTR (3, 0)); } static void @@ -12505,10 +13697,10 @@ dexBranchRegister (sim_cpu *cpu) instr[15,10] = op3 : must be 000000 instr[4,0] = op2 : must be 11111. */ - uint32_t op = uimm (aarch64_get_instr (cpu), 24, 21); - uint32_t op2 = uimm (aarch64_get_instr (cpu), 20, 16); - uint32_t op3 = uimm (aarch64_get_instr (cpu), 15, 10); - uint32_t op4 = uimm (aarch64_get_instr (cpu), 4, 0); + uint32_t op = INSTR (24, 21); + uint32_t op2 = INSTR (20, 16); + uint32_t op3 = INSTR (15, 10); + uint32_t op4 = INSTR (4, 0); NYI_assert (31, 25, 0x6b); @@ -12526,9 +13718,9 @@ dexBranchRegister (sim_cpu *cpu) else { - /* ERET and DRPS accept 0b11111 for rn = aarch64_get_instr (cpu)[4,0]. */ + /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */ /* anything else is unallocated. */ - uint32_t rn = greg (aarch64_get_instr (cpu), 0); + uint32_t rn = INSTR (4, 0); if (rn != 0x1f) HALT_UNALLOC; @@ -12567,6 +13759,7 @@ handle_halt (sim_cpu *cpu, uint32_t val) { uint64_t result = 0; + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); if (val != 0xf000) { TRACE_SYSCALL (cpu, " HLT [0x%x]", val); @@ -12682,9 +13875,6 @@ handle_halt (sim_cpu *cpu, uint32_t val) else if (fd == 1) { printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf)); - if (disas) - /* So that the output stays in sync with trace output. */ - fflush (stdout); } else if (fd == 2) { @@ -12753,9 +13943,9 @@ dexExcpnGen (sim_cpu *cpu) instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC instr[1,0] = LL : discriminates opc */ - uint32_t opc = uimm (aarch64_get_instr (cpu), 23, 21); - uint32_t imm16 = uimm (aarch64_get_instr (cpu), 20, 5); - uint32_t opc2 = uimm (aarch64_get_instr (cpu), 4, 2); + uint32_t opc = INSTR (23, 21); + uint32_t imm16 = INSTR (20, 5); + uint32_t opc2 = INSTR (4, 2); uint32_t LL; NYI_assert (31, 24, 0xd4); @@ -12763,7 +13953,7 @@ dexExcpnGen (sim_cpu *cpu) if (opc2 != 0) HALT_UNALLOC; - LL = uimm (aarch64_get_instr (cpu), 1, 0); + LL = INSTR (1, 0); /* We only implement HLT and BRK for now. */ if (opc == 1 && LL == 0) @@ -12783,9 +13973,7 @@ dexExcpnGen (sim_cpu *cpu) HALT_UNALLOC; } -/* Stub for accessing system registers. - We implement support for the DCZID register since this is used - by the C library's memset function. */ +/* Stub for accessing system registers. */ static uint64_t system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn, @@ -12794,33 +13982,147 @@ system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn, if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7) /* DCZID_EL0 - the Data Cache Zero ID register. We do not support DC ZVA at the moment, so - we return a value with the disable bit set. */ + we return a value with the disable bit set. + We implement support for the DCZID register since + it is used by the C library's memset function. */ return ((uint64_t) 1) << 4; + if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1) + /* Cache Type Register. */ + return 0x80008000UL; + + if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2) + /* TPIDR_EL0 - thread pointer id. */ + return aarch64_get_thread_id (cpu); + + if (op1 == 3 && crm == 4 && op2 == 0) + return aarch64_get_FPCR (cpu); + + if (op1 == 3 && crm == 4 && op2 == 1) + return aarch64_get_FPSR (cpu); + + else if (op1 == 3 && crm == 2 && op2 == 0) + return aarch64_get_CPSR (cpu); + HALT_NYI; } +static void +system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn, + unsigned crm, unsigned op2, uint64_t val) +{ + if (op1 == 3 && crm == 4 && op2 == 0) + aarch64_set_FPCR (cpu, val); + + else if (op1 == 3 && crm == 4 && op2 == 1) + aarch64_set_FPSR (cpu, val); + + else if (op1 == 3 && crm == 2 && op2 == 0) + aarch64_set_CPSR (cpu, val); + + else + HALT_NYI; +} + static void do_mrs (sim_cpu *cpu) { - /* instr[31:20] = 1101 01010 0011 + /* instr[31:20] = 1101 0101 0001 1 instr[19] = op0 instr[18,16] = op1 instr[15,12] = CRn instr[11,8] = CRm instr[7,5] = op2 instr[4,0] = Rt */ - unsigned sys_op0 = uimm (aarch64_get_instr (cpu), 19, 19) + 2; - unsigned sys_op1 = uimm (aarch64_get_instr (cpu), 18, 16); - unsigned sys_crn = uimm (aarch64_get_instr (cpu), 15, 12); - unsigned sys_crm = uimm (aarch64_get_instr (cpu), 11, 8); - unsigned sys_op2 = uimm (aarch64_get_instr (cpu), 7, 5); - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); - + unsigned sys_op0 = INSTR (19, 19) + 2; + unsigned sys_op1 = INSTR (18, 16); + unsigned sys_crn = INSTR (15, 12); + unsigned sys_crm = INSTR (11, 8); + unsigned sys_op2 = INSTR (7, 5); + unsigned rt = INSTR (4, 0); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); aarch64_set_reg_u64 (cpu, rt, NO_SP, system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2)); } +static void +do_MSR_immediate (sim_cpu *cpu) +{ + /* instr[31:19] = 1101 0101 0000 0 + instr[18,16] = op1 + instr[15,12] = 0100 + instr[11,8] = CRm + instr[7,5] = op2 + instr[4,0] = 1 1111 */ + + unsigned op1 = INSTR (18, 16); + /*unsigned crm = INSTR (11, 8);*/ + unsigned op2 = INSTR (7, 5); + + NYI_assert (31, 19, 0x1AA0); + NYI_assert (15, 12, 0x4); + NYI_assert (4, 0, 0x1F); + + if (op1 == 0) + { + if (op2 == 5) + HALT_NYI; /* set SPSel. */ + else + HALT_UNALLOC; + } + else if (op1 == 3) + { + if (op2 == 6) + HALT_NYI; /* set DAIFset. */ + else if (op2 == 7) + HALT_NYI; /* set DAIFclr. */ + else + HALT_UNALLOC; + } + else + HALT_UNALLOC; +} + +static void +do_MSR_reg (sim_cpu *cpu) +{ + /* instr[31:20] = 1101 0101 0001 + instr[19] = op0 + instr[18,16] = op1 + instr[15,12] = CRn + instr[11,8] = CRm + instr[7,5] = op2 + instr[4,0] = Rt */ + + unsigned sys_op0 = INSTR (19, 19) + 2; + unsigned sys_op1 = INSTR (18, 16); + unsigned sys_crn = INSTR (15, 12); + unsigned sys_crm = INSTR (11, 8); + unsigned sys_op2 = INSTR (7, 5); + unsigned rt = INSTR (4, 0); + + NYI_assert (31, 20, 0xD51); + + TRACE_DECODE (cpu, "emulated at line %d", __LINE__); + system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2, + aarch64_get_reg_u64 (cpu, rt, NO_SP)); +} + +static void +do_SYS (sim_cpu *cpu) +{ + /* instr[31,19] = 1101 0101 0000 1 + instr[18,16] = op1 + instr[15,12] = CRn + instr[11,8] = CRm + instr[7,5] = op2 + instr[4,0] = Rt */ + NYI_assert (31, 19, 0x1AA1); + + /* FIXME: For now we just silently accept system ops. */ +} + static void dexSystem (sim_cpu *cpu) { @@ -12850,20 +14152,19 @@ dexSystem (sim_cpu *cpu) types : 01 ==> Reads, 10 ==> Writes, 11 ==> All, 00 ==> All (domain == FullSystem). */ - unsigned rt = uimm (aarch64_get_instr (cpu), 4, 0); - uint32_t l_op0_op1_crn = uimm (aarch64_get_instr (cpu), 21, 12); + unsigned rt = INSTR (4, 0); NYI_assert (31, 22, 0x354); - switch (l_op0_op1_crn) + switch (INSTR (21, 12)) { case 0x032: if (rt == 0x1F) { /* NOP has CRm != 0000 OR. */ /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */ - uint32_t crm = uimm (aarch64_get_instr (cpu), 11, 8); - uint32_t op2 = uimm (aarch64_get_instr (cpu), 7, 5); + uint32_t crm = INSTR (11, 8); + uint32_t op2 = INSTR (7, 5); if (crm != 0 || (op2 == 0 || op2 > 5)) { @@ -12876,7 +14177,7 @@ dexSystem (sim_cpu *cpu) case 0x033: { - uint32_t op2 = uimm (aarch64_get_instr (cpu), 7, 5); + uint32_t op2 = INSTR (7, 5); switch (op2) { @@ -12884,31 +14185,27 @@ dexSystem (sim_cpu *cpu) case 4: dsb (cpu); return; case 5: dmb (cpu); return; case 6: isb (cpu); return; - case 7: default: HALT_UNALLOC; } } case 0x3B0: - /* MRS Wt, sys-reg. */ - do_mrs (cpu); - return; - case 0x3B4: case 0x3BD: - /* MRS Xt, sys-reg. */ do_mrs (cpu); return; case 0x0B7: - /* DC , x. */ - HALT_NYI; + do_SYS (cpu); /* DC is an alias of SYS. */ return; default: - /* if (uimm (aarch64_get_instr (cpu), 21, 20) == 0x1) - MRS Xt, sys-reg. */ - HALT_NYI; + if (INSTR (21, 20) == 0x1) + do_MSR_reg (cpu); + else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4) + do_MSR_immediate (cpu); + else + HALT_NYI; return; } } @@ -12928,7 +14225,7 @@ dexBr (sim_cpu *cpu) case BR_IMMCMP_001: /* Compare has bit 25 clear while test has it set. */ - if (!uimm (aarch64_get_instr (cpu), 25, 25)) + if (!INSTR (25, 25)) dexCompareBranchImmediate (cpu); else dexTestBranchImmediate (cpu); @@ -12937,7 +14234,7 @@ dexBr (sim_cpu *cpu) case BR_IMMCOND_010: /* This is a conditional branch if bit 25 is clear otherwise unallocated. */ - if (!uimm (aarch64_get_instr (cpu), 25, 25)) + if (!INSTR (25, 25)) dexCondBranchImmediate (cpu); else HALT_UNALLOC; @@ -12952,7 +14249,7 @@ dexBr (sim_cpu *cpu) case BR_IMMCMP_101: /* Compare has bit 25 clear while test has it set. */ - if (!uimm (aarch64_get_instr (cpu), 25, 25)) + if (!INSTR (25, 25)) dexCompareBranchImmediate (cpu); else dexTestBranchImmediate (cpu); @@ -12960,20 +14257,20 @@ dexBr (sim_cpu *cpu) case BR_REG_110: /* Unconditional branch reg has bit 25 set. */ - if (uimm (aarch64_get_instr (cpu), 25, 25)) + if (INSTR (25, 25)) dexBranchRegister (cpu); /* This includes both Excpn Gen, System and unalloc operations. We need to decode the Excpn Gen operation BRK so we can plant debugger entry points. - Excpn Gen operations have aarch64_get_instr (cpu)[24] = 0. + Excpn Gen operations have instr [24] = 0. we need to decode at least one of the System operations NOP which is an alias for HINT #0. - System operations have aarch64_get_instr (cpu)[24,22] = 100. */ - else if (uimm (aarch64_get_instr (cpu), 24, 24) == 0) + System operations have instr [24,22] = 100. */ + else if (INSTR (24, 24) == 0) dexExcpnGen (cpu); - else if (uimm (aarch64_get_instr (cpu), 24, 22) == 4) + else if (INSTR (24, 22) == 4) dexSystem (cpu); else @@ -13034,21 +14331,15 @@ aarch64_step (sim_cpu *cpu) return FALSE; aarch64_set_next_PC (cpu, pc + 4); - aarch64_get_instr (cpu) = aarch64_get_mem_u32 (cpu, pc); - if (TRACE_INSN_P (cpu)) - { - if (disas) - TRACE_INSN (cpu, " pc = %" PRIx64 " ", pc); - else - TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %x", pc, - aarch64_get_instr (cpu)); - } - else if (disas) - sim_io_eprintf (CPU_STATE (cpu), " %" PRIx64 " ", pc); + /* Code is always little-endian. */ + sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map, + & aarch64_get_instr (cpu), pc, 4); + aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu)); - if (disas) - aarch64_print_insn (CPU_STATE (cpu), pc); + TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc, + aarch64_get_instr (cpu)); + TRACE_DISASM (cpu, pc); aarch64_decode_and_execute (cpu, pc); @@ -13061,10 +14352,15 @@ aarch64_run (SIM_DESC sd) sim_cpu *cpu = STATE_CPU (sd, 0); while (aarch64_step (cpu)) - aarch64_update_PC (cpu); + { + aarch64_update_PC (cpu); + + if (sim_events_tick (sd)) + sim_events_process (sd); + } - sim_engine_halt (sd, NULL, NULL, aarch64_get_PC (cpu), - sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK)); + sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu), + sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP)); } void