sim/aarch64/simulator.c

   1 /* simulator.c -- Interface for the AArch64 simulator.
   2
   3    Copyright (C) 2015-2017 Free Software Foundation, Inc.
   4
   5    Contributed by Red Hat.
   6
   7    This file is part of GDB.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include <stdlib.h>
  24 #include <stdio.h>
  25 #include <string.h>
  26 #include <sys/types.h>
  27 #include <math.h>
  28 #include <time.h>
  29 #include <limits.h>
  30
  31 #include "simulator.h"
  32 #include "cpustate.h"
  33 #include "memory.h"
  34
  35 #define NO_SP 0
  36 #define SP_OK 1
  37
  38 #define TST(_flag)   (aarch64_test_CPSR_bit (cpu, _flag))
  39 #define IS_SET(_X)   (TST (( _X )) ? 1 : 0)
  40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
  41
  42 /* Space saver macro.  */
  43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
  44
  45 #define HALT_UNALLOC                                                    \
  46   do                                                                    \
  47     {                                                                   \
  48       TRACE_DISASM (cpu, aarch64_get_PC (cpu));                         \
  49       TRACE_INSN (cpu,                                                  \
  50                   "Unallocated instruction detected at sim line %d,"    \
  51                   " exe addr %" PRIx64,                                 \
  52                   __LINE__, aarch64_get_PC (cpu));                      \
  53       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
  54                        sim_stopped, SIM_SIGILL);                        \
  55     }                                                                   \
  56   while (0)
  57
  58 #define HALT_NYI                                                        \
  59   do                                                                    \
  60     {                                                                   \
  61       TRACE_DISASM (cpu, aarch64_get_PC (cpu));                         \
  62       TRACE_INSN (cpu,                                                  \
  63                   "Unimplemented instruction detected at sim line %d,"  \
  64                   " exe addr %" PRIx64,                                 \
  65                   __LINE__, aarch64_get_PC (cpu));                      \
  66       if (! TRACE_ANY_P (cpu))                                          \
  67         sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
  68                         aarch64_get_instr (cpu));                       \
  69       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
  70                        sim_stopped, SIM_SIGABRT);                       \
  71     }                                                                   \
  72   while (0)
  73
  74 #define NYI_assert(HI, LO, EXPECTED)                                    \
  75   do                                                                    \
  76     {                                                                   \
  77       if (INSTR ((HI), (LO)) != (EXPECTED))                             \
  78         HALT_NYI;                                                       \
  79     }                                                                   \
  80   while (0)
  81
  82 /* Helper functions used by expandLogicalImmediate.  */
  83
  84 /* for i = 1, ... N result<i-1> = 1 other bits are zero  */
  85 static inline uint64_t
  86 ones (int N)
  87 {
  88   return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
  89 }
  90
  91 /* result<0> to val<N>  */
  92 static inline uint64_t
  93 pickbit (uint64_t val, int N)
  94 {
  95   return pickbits64 (val, N, N);
  96 }
  97
  98 static uint64_t
  99 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
 100 {
 101   uint64_t mask;
 102   uint64_t imm;
 103   unsigned simd_size;
 104
 105   /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
 106      (in other words, right rotated by R), then replicated. */
 107   if (N != 0)
 108     {
 109       simd_size = 64;
 110       mask = 0xffffffffffffffffull;
 111     }
 112   else
 113     {
 114       switch (S)
 115         {
 116         case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32;           break;
 117         case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
 118         case 0x30 ... 0x37: /* 110xxx */ simd_size =  8; S &= 0x7; break;
 119         case 0x38 ... 0x3b: /* 1110xx */ simd_size =  4; S &= 0x3; break;
 120         case 0x3c ... 0x3d: /* 11110x */ simd_size =  2; S &= 0x1; break;
 121         default: return 0;
 122         }
 123       mask = (1ull << simd_size) - 1;
 124       /* Top bits are IGNORED.  */
 125       R &= simd_size - 1;
 126     }
 127
 128   /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected.  */
 129   if (S == simd_size - 1)
 130     return 0;
 131
 132   /* S+1 consecutive bits to 1.  */
 133   /* NOTE: S can't be 63 due to detection above.  */
 134   imm = (1ull << (S + 1)) - 1;
 135
 136   /* Rotate to the left by simd_size - R.  */
 137   if (R != 0)
 138     imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
 139
 140   /* Replicate the value according to SIMD size.  */
 141   switch (simd_size)
 142     {
 143     case  2: imm = (imm <<  2) | imm;
 144     case  4: imm = (imm <<  4) | imm;
 145     case  8: imm = (imm <<  8) | imm;
 146     case 16: imm = (imm << 16) | imm;
 147     case 32: imm = (imm << 32) | imm;
 148     case 64: break;
 149     default: return 0;
 150     }
 151
 152   return imm;
 153 }
 154
 155 /* Instr[22,10] encodes N immr and imms. we want a lookup table
 156    for each possible combination i.e. 13 bits worth of int entries.  */
 157 #define  LI_TABLE_SIZE  (1 << 13)
 158 static uint64_t LITable[LI_TABLE_SIZE];
 159
 160 void
 161 aarch64_init_LIT_table (void)
 162 {
 163   unsigned index;
 164
 165   for (index = 0; index < LI_TABLE_SIZE; index++)
 166     {
 167       uint32_t N    = uimm (index, 12, 12);
 168       uint32_t immr = uimm (index, 11, 6);
 169       uint32_t imms = uimm (index, 5, 0);
 170
 171       LITable [index] = expand_logical_immediate (imms, immr, N);
 172     }
 173 }
 174
 175 static void
 176 dexNotify (sim_cpu *cpu)
 177 {
 178   /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
 179                            2 ==> exit Java, 3 ==> start next bytecode.  */
 180   uint32_t type = INSTR (14, 0);
 181
 182   TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
 183
 184   switch (type)
 185     {
 186     case 0:
 187       /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
 188          aarch64_get_reg_u64 (cpu, R22, 0));  */
 189       break;
 190     case 1:
 191       /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
 192          aarch64_get_reg_u64 (cpu, R22, 0));  */
 193       break;
 194     case 2:
 195       /* aarch64_notifyMethodExit ();  */
 196       break;
 197     case 3:
 198       /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
 199          aarch64_get_reg_u64 (cpu, R22, 0));  */
 200       break;
 201     }
 202 }
 203
 204 /* secondary decode within top level groups  */
 205
 206 static void
 207 dexPseudo (sim_cpu *cpu)
 208 {
 209   /* assert instr[28,27] = 00
 210
 211      We provide 2 pseudo instructions:
 212
 213      HALT stops execution of the simulator causing an immediate
 214      return to the x86 code which entered it.
 215
 216      CALLOUT initiates recursive entry into x86 code.  A register
 217      argument holds the address of the x86 routine.  Immediate
 218      values in the instruction identify the number of general
 219      purpose and floating point register arguments to be passed
 220      and the type of any value to be returned.  */
 221
 222   uint32_t PSEUDO_HALT      =  0xE0000000U;
 223   uint32_t PSEUDO_CALLOUT   =  0x00018000U;
 224   uint32_t PSEUDO_CALLOUTR  =  0x00018001U;
 225   uint32_t PSEUDO_NOTIFY    =  0x00014000U;
 226   uint32_t dispatch;
 227
 228   if (aarch64_get_instr (cpu) == PSEUDO_HALT)
 229     {
 230       TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
 231       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
 232                        sim_stopped, SIM_SIGTRAP);
 233     }
 234
 235   dispatch = INSTR (31, 15);
 236
 237   /* We do not handle callouts at the moment.  */
 238   if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
 239     {
 240       TRACE_EVENTS (cpu, " Callout");
 241       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
 242                        sim_stopped, SIM_SIGABRT);
 243     }
 244
 245   else if (dispatch == PSEUDO_NOTIFY)
 246     dexNotify (cpu);
 247
 248   else
 249     HALT_UNALLOC;
 250 }
 251
 252 /* Load-store single register (unscaled offset)
 253    These instructions employ a base register plus an unscaled signed
 254    9 bit offset.
 255
 256    N.B. the base register (source) can be Xn or SP. all other
 257    registers may not be SP.  */
 258
 259 /* 32 bit load 32 bit unscaled signed 9 bit.  */
 260 static void
 261 ldur32 (sim_cpu *cpu, int32_t offset)
 262 {
 263   unsigned rn = INSTR (9, 5);
 264   unsigned rt = INSTR (4, 0);
 265
 266   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 267   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
 268                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 269                         + offset));
 270 }
 271
 272 /* 64 bit load 64 bit unscaled signed 9 bit.  */
 273 static void
 274 ldur64 (sim_cpu *cpu, int32_t offset)
 275 {
 276   unsigned rn = INSTR (9, 5);
 277   unsigned rt = INSTR (4, 0);
 278
 279   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 280   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
 281                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 282                         + offset));
 283 }
 284
 285 /* 32 bit load zero-extended byte unscaled signed 9 bit.  */
 286 static void
 287 ldurb32 (sim_cpu *cpu, int32_t offset)
 288 {
 289   unsigned rn = INSTR (9, 5);
 290   unsigned rt = INSTR (4, 0);
 291
 292   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 293   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
 294                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 295                         + offset));
 296 }
 297
 298 /* 32 bit load sign-extended byte unscaled signed 9 bit.  */
 299 static void
 300 ldursb32 (sim_cpu *cpu, int32_t offset)
 301 {
 302   unsigned rn = INSTR (9, 5);
 303   unsigned rt = INSTR (4, 0);
 304
 305   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 306   aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
 307                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 308                         + offset));
 309 }
 310
 311 /* 64 bit load sign-extended byte unscaled signed 9 bit.  */
 312 static void
 313 ldursb64 (sim_cpu *cpu, int32_t offset)
 314 {
 315   unsigned rn = INSTR (9, 5);
 316   unsigned rt = INSTR (4, 0);
 317
 318   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 319   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
 320                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 321                         + offset));
 322 }
 323
 324 /* 32 bit load zero-extended short unscaled signed 9 bit  */
 325 static void
 326 ldurh32 (sim_cpu *cpu, int32_t offset)
 327 {
 328   unsigned rn = INSTR (9, 5);
 329   unsigned rd = INSTR (4, 0);
 330
 331   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 332   aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
 333                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 334                         + offset));
 335 }
 336
 337 /* 32 bit load sign-extended short unscaled signed 9 bit  */
 338 static void
 339 ldursh32 (sim_cpu *cpu, int32_t offset)
 340 {
 341   unsigned rn = INSTR (9, 5);
 342   unsigned rd = INSTR (4, 0);
 343
 344   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 345   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
 346                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 347                         + offset));
 348 }
 349
 350 /* 64 bit load sign-extended short unscaled signed 9 bit  */
 351 static void
 352 ldursh64 (sim_cpu *cpu, int32_t offset)
 353 {
 354   unsigned rn = INSTR (9, 5);
 355   unsigned rt = INSTR (4, 0);
 356
 357   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 358   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
 359                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 360                         + offset));
 361 }
 362
 363 /* 64 bit load sign-extended word unscaled signed 9 bit  */
 364 static void
 365 ldursw (sim_cpu *cpu, int32_t offset)
 366 {
 367   unsigned rn = INSTR (9, 5);
 368   unsigned rd = INSTR (4, 0);
 369
 370   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 371   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
 372                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 373                         + offset));
 374 }
 375
 376 /* N.B. with stores the value in source is written to the address
 377    identified by source2 modified by offset.  */
 378
 379 /* 32 bit store 32 bit unscaled signed 9 bit.  */
 380 static void
 381 stur32 (sim_cpu *cpu, int32_t offset)
 382 {
 383   unsigned rn = INSTR (9, 5);
 384   unsigned rd = INSTR (4, 0);
 385
 386   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 387   aarch64_set_mem_u32 (cpu,
 388                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 389                        aarch64_get_reg_u32 (cpu, rd, NO_SP));
 390 }
 391
 392 /* 64 bit store 64 bit unscaled signed 9 bit  */
 393 static void
 394 stur64 (sim_cpu *cpu, int32_t offset)
 395 {
 396   unsigned rn = INSTR (9, 5);
 397   unsigned rd = INSTR (4, 0);
 398
 399   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 400   aarch64_set_mem_u64 (cpu,
 401                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 402                        aarch64_get_reg_u64 (cpu, rd, NO_SP));
 403 }
 404
 405 /* 32 bit store byte unscaled signed 9 bit  */
 406 static void
 407 sturb (sim_cpu *cpu, int32_t offset)
 408 {
 409   unsigned rn = INSTR (9, 5);
 410   unsigned rd = INSTR (4, 0);
 411
 412   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 413   aarch64_set_mem_u8 (cpu,
 414                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 415                       aarch64_get_reg_u8 (cpu, rd, NO_SP));
 416 }
 417
 418 /* 32 bit store short unscaled signed 9 bit  */
 419 static void
 420 sturh (sim_cpu *cpu, int32_t offset)
 421 {
 422   unsigned rn = INSTR (9, 5);
 423   unsigned rd = INSTR (4, 0);
 424
 425   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 426   aarch64_set_mem_u16 (cpu,
 427                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 428                        aarch64_get_reg_u16 (cpu, rd, NO_SP));
 429 }
 430
 431 /* Load single register pc-relative label
 432    Offset is a signed 19 bit immediate count in words
 433    rt may not be SP.  */
 434
 435 /* 32 bit pc-relative load  */
 436 static void
 437 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
 438 {
 439   unsigned rd = INSTR (4, 0);
 440
 441   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 442   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 443                        aarch64_get_mem_u32
 444                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 445 }
 446
 447 /* 64 bit pc-relative load  */
 448 static void
 449 ldr_pcrel (sim_cpu *cpu, int32_t offset)
 450 {
 451   unsigned rd = INSTR (4, 0);
 452
 453   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 454   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 455                        aarch64_get_mem_u64
 456                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 457 }
 458
 459 /* sign extended 32 bit pc-relative load  */
 460 static void
 461 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
 462 {
 463   unsigned rd = INSTR (4, 0);
 464
 465   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 466   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 467                        aarch64_get_mem_s32
 468                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 469 }
 470
 471 /* float pc-relative load  */
 472 static void
 473 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
 474 {
 475   unsigned int rd = INSTR (4, 0);
 476
 477   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 478   aarch64_set_vec_u32 (cpu, rd, 0,
 479                        aarch64_get_mem_u32
 480                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 481 }
 482
 483 /* double pc-relative load  */
 484 static void
 485 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
 486 {
 487   unsigned int st = INSTR (4, 0);
 488
 489   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 490   aarch64_set_vec_u64 (cpu, st, 0,
 491                        aarch64_get_mem_u64
 492                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 493 }
 494
 495 /* long double pc-relative load.  */
 496 static void
 497 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
 498 {
 499   unsigned int st = INSTR (4, 0);
 500   uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
 501   FRegister a;
 502
 503   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 504   aarch64_get_mem_long_double (cpu, addr, & a);
 505   aarch64_set_FP_long_double (cpu, st, a);
 506 }
 507
 508 /* This can be used to scale an offset by applying
 509    the requisite shift. the second argument is either
 510    16, 32 or 64.  */
 511
 512 #define SCALE(_offset, _elementSize) \
 513     ((_offset) << ScaleShift ## _elementSize)
 514
 515 /* This can be used to optionally scale a register derived offset
 516    by applying the requisite shift as indicated by the Scaling
 517    argument.  The second argument is either Byte, Short, Word
 518    or Long. The third argument is either Scaled or Unscaled.
 519    N.B. when _Scaling is Scaled the shift gets ANDed with
 520    all 1s while when it is Unscaled it gets ANDed with 0.  */
 521
 522 #define OPT_SCALE(_offset, _elementType, _Scaling) \
 523   ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
 524
 525 /* This can be used to zero or sign extend a 32 bit register derived
 526    value to a 64 bit value.  the first argument must be the value as
 527    a uint32_t and the second must be either UXTW or SXTW. The result
 528    is returned as an int64_t.  */
 529
 530 static inline int64_t
 531 extend (uint32_t value, Extension extension)
 532 {
 533   union
 534   {
 535     uint32_t u;
 536     int32_t   n;
 537   } x;
 538
 539   /* A branchless variant of this ought to be possible.  */
 540   if (extension == UXTW || extension == NoExtension)
 541     return value;
 542
 543   x.u = value;
 544   return x.n;
 545 }
 546
 547 /* Scalar Floating Point
 548
 549    FP load/store single register (4 addressing modes)
 550
 551    N.B. the base register (source) can be the stack pointer.
 552    The secondary source register (source2) can only be an Xn register.  */
 553
 554 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback.  */
 555 static void
 556 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 557 {
 558   unsigned rn = INSTR (9, 5);
 559   unsigned st = INSTR (4, 0);
 560   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 561
 562   if (wb != Post)
 563     address += offset;
 564
 565   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 566   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
 567   if (wb == Post)
 568     address += offset;
 569
 570   if (wb != NoWriteBack)
 571     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 572 }
 573
 574 /* Load 8 bit with unsigned 12 bit offset.  */
 575 static void
 576 fldrb_abs (sim_cpu *cpu, uint32_t offset)
 577 {
 578   unsigned rd = INSTR (4, 0);
 579   unsigned rn = INSTR (9, 5);
 580   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
 581
 582   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 583   aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
 584 }
 585
 586 /* Load 16 bit scaled unsigned 12 bit.  */
 587 static void
 588 fldrh_abs (sim_cpu *cpu, uint32_t offset)
 589 {
 590   unsigned rd = INSTR (4, 0);
 591   unsigned rn = INSTR (9, 5);
 592   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
 593
 594   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 595   aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
 596 }
 597
 598 /* Load 32 bit scaled unsigned 12 bit.  */
 599 static void
 600 fldrs_abs (sim_cpu *cpu, uint32_t offset)
 601 {
 602   unsigned rd = INSTR (4, 0);
 603   unsigned rn = INSTR (9, 5);
 604   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
 605
 606   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 607   aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
 608 }
 609
 610 /* Load 64 bit scaled unsigned 12 bit.  */
 611 static void
 612 fldrd_abs (sim_cpu *cpu, uint32_t offset)
 613 {
 614   unsigned rd = INSTR (4, 0);
 615   unsigned rn = INSTR (9, 5);
 616   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
 617
 618   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 619   aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
 620 }
 621
 622 /* Load 128 bit scaled unsigned 12 bit.  */
 623 static void
 624 fldrq_abs (sim_cpu *cpu, uint32_t offset)
 625 {
 626   unsigned rd = INSTR (4, 0);
 627   unsigned rn = INSTR (9, 5);
 628   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
 629
 630   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 631   aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
 632   aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
 633 }
 634
 635 /* Load 32 bit scaled or unscaled zero- or sign-extended
 636    32-bit register offset.  */
 637 static void
 638 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 639 {
 640   unsigned rm = INSTR (20, 16);
 641   unsigned rn = INSTR (9, 5);
 642   unsigned st = INSTR (4, 0);
 643   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 644   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 645   uint64_t displacement = OPT_SCALE (extended, 32, scaling);
 646
 647   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 648   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
 649                        (cpu, address + displacement));
 650 }
 651
 652 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback.  */
 653 static void
 654 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 655 {
 656   unsigned rn = INSTR (9, 5);
 657   unsigned st = INSTR (4, 0);
 658   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 659
 660   if (wb != Post)
 661     address += offset;
 662
 663   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 664   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
 665
 666   if (wb == Post)
 667     address += offset;
 668
 669   if (wb != NoWriteBack)
 670     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 671 }
 672
 673 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset.  */
 674 static void
 675 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 676 {
 677   unsigned rm = INSTR (20, 16);
 678   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 679   uint64_t displacement = OPT_SCALE (extended, 64, scaling);
 680
 681   fldrd_wb (cpu, displacement, NoWriteBack);
 682 }
 683
 684 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback.  */
 685 static void
 686 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 687 {
 688   FRegister a;
 689   unsigned rn = INSTR (9, 5);
 690   unsigned st = INSTR (4, 0);
 691   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 692
 693   if (wb != Post)
 694     address += offset;
 695
 696   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 697   aarch64_get_mem_long_double (cpu, address, & a);
 698   aarch64_set_FP_long_double (cpu, st, a);
 699
 700   if (wb == Post)
 701     address += offset;
 702
 703   if (wb != NoWriteBack)
 704     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 705 }
 706
 707 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset  */
 708 static void
 709 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 710 {
 711   unsigned rm = INSTR (20, 16);
 712   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 713   uint64_t displacement = OPT_SCALE (extended, 128, scaling);
 714
 715   fldrq_wb (cpu, displacement, NoWriteBack);
 716 }
 717
 718 /* Memory Access
 719
 720    load-store single register
 721    There are four addressing modes available here which all employ a
 722    64 bit source (base) register.
 723
 724    N.B. the base register (source) can be the stack pointer.
 725    The secondary source register (source2)can only be an Xn register.
 726
 727    Scaled, 12-bit, unsigned immediate offset, without pre- and
 728    post-index options.
 729    Unscaled, 9-bit, signed immediate offset with pre- or post-index
 730    writeback.
 731    scaled or unscaled 64-bit register offset.
 732    scaled or unscaled 32-bit extended register offset.
 733
 734    All offsets are assumed to be raw from the decode i.e. the
 735    simulator is expected to adjust scaled offsets based on the
 736    accessed data size with register or extended register offset
 737    versions the same applies except that in the latter case the
 738    operation may also require a sign extend.
 739
 740    A separate method is provided for each possible addressing mode.  */
 741
 742 /* 32 bit load 32 bit scaled unsigned 12 bit  */
 743 static void
 744 ldr32_abs (sim_cpu *cpu, uint32_t offset)
 745 {
 746   unsigned rn = INSTR (9, 5);
 747   unsigned rt = INSTR (4, 0);
 748
 749   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 750   /* The target register may not be SP but the source may be.  */
 751   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
 752                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 753                         + SCALE (offset, 32)));
 754 }
 755
 756 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback.  */
 757 static void
 758 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 759 {
 760   unsigned rn = INSTR (9, 5);
 761   unsigned rt = INSTR (4, 0);
 762   uint64_t address;
 763
 764   if (rn == rt && wb != NoWriteBack)
 765     HALT_UNALLOC;
 766
 767   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 768
 769   if (wb != Post)
 770     address += offset;
 771
 772   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 773   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
 774
 775   if (wb == Post)
 776     address += offset;
 777
 778   if (wb != NoWriteBack)
 779     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 780 }
 781
 782 /* 32 bit load 32 bit scaled or unscaled
 783    zero- or sign-extended 32-bit register offset  */
 784 static void
 785 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 786 {
 787   unsigned rm = INSTR (20, 16);
 788   unsigned rn = INSTR (9, 5);
 789   unsigned rt = INSTR (4, 0);
 790   /* rn may reference SP, rm and rt must reference ZR  */
 791
 792   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 793   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 794   uint64_t displacement =  OPT_SCALE (extended, 32, scaling);
 795
 796   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 797   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 798                        aarch64_get_mem_u32 (cpu, address + displacement));
 799 }
 800
 801 /* 64 bit load 64 bit scaled unsigned 12 bit  */
 802 static void
 803 ldr_abs (sim_cpu *cpu, uint32_t offset)
 804 {
 805   unsigned rn = INSTR (9, 5);
 806   unsigned rt = INSTR (4, 0);
 807
 808   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 809   /* The target register may not be SP but the source may be.  */
 810   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
 811                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 812                         + SCALE (offset, 64)));
 813 }
 814
 815 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback.  */
 816 static void
 817 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 818 {
 819   unsigned rn = INSTR (9, 5);
 820   unsigned rt = INSTR (4, 0);
 821   uint64_t address;
 822
 823   if (rn == rt && wb != NoWriteBack)
 824     HALT_UNALLOC;
 825
 826   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 827
 828   if (wb != Post)
 829     address += offset;
 830
 831   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 832   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
 833
 834   if (wb == Post)
 835     address += offset;
 836
 837   if (wb != NoWriteBack)
 838     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 839 }
 840
 841 /* 64 bit load 64 bit scaled or unscaled zero-
 842    or sign-extended 32-bit register offset.  */
 843 static void
 844 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 845 {
 846   unsigned rm = INSTR (20, 16);
 847   unsigned rn = INSTR (9, 5);
 848   unsigned rt = INSTR (4, 0);
 849   /* rn may reference SP, rm and rt must reference ZR  */
 850
 851   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 852   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 853   uint64_t displacement =  OPT_SCALE (extended, 64, scaling);
 854
 855   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 856   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 857                        aarch64_get_mem_u64 (cpu, address + displacement));
 858 }
 859
 860 /* 32 bit load zero-extended byte scaled unsigned 12 bit.  */
 861 static void
 862 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
 863 {
 864   unsigned rn = INSTR (9, 5);
 865   unsigned rt = INSTR (4, 0);
 866
 867   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 868   /* The target register may not be SP but the source may be
 869      there is no scaling required for a byte load.  */
 870   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 871                        aarch64_get_mem_u8
 872                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
 873 }
 874
 875 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback.  */
 876 static void
 877 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 878 {
 879   unsigned rn = INSTR (9, 5);
 880   unsigned rt = INSTR (4, 0);
 881   uint64_t address;
 882
 883   if (rn == rt && wb != NoWriteBack)
 884     HALT_UNALLOC;
 885
 886   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 887
 888   if (wb != Post)
 889     address += offset;
 890
 891   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 892   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
 893
 894   if (wb == Post)
 895     address += offset;
 896
 897   if (wb != NoWriteBack)
 898     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 899 }
 900
 901 /* 32 bit load zero-extended byte scaled or unscaled zero-
 902    or sign-extended 32-bit register offset.  */
 903 static void
 904 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 905 {
 906   unsigned rm = INSTR (20, 16);
 907   unsigned rn = INSTR (9, 5);
 908   unsigned rt = INSTR (4, 0);
 909   /* rn may reference SP, rm and rt must reference ZR  */
 910
 911   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 912   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
 913                                  extension);
 914
 915   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 916   /* There is no scaling required for a byte load.  */
 917   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 918                        aarch64_get_mem_u8 (cpu, address + displacement));
 919 }
 920
 921 /* 64 bit load sign-extended byte unscaled signed 9 bit
 922    with pre- or post-writeback.  */
 923 static void
 924 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 925 {
 926   unsigned rn = INSTR (9, 5);
 927   unsigned rt = INSTR (4, 0);
 928   uint64_t address;
 929   int64_t val;
 930
 931   if (rn == rt && wb != NoWriteBack)
 932     HALT_UNALLOC;
 933
 934   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 935
 936   if (wb != Post)
 937     address += offset;
 938
 939   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 940   val = aarch64_get_mem_s8 (cpu, address);
 941   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
 942
 943   if (wb == Post)
 944     address += offset;
 945
 946   if (wb != NoWriteBack)
 947     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 948 }
 949
 950 /* 64 bit load sign-extended byte scaled unsigned 12 bit.  */
 951 static void
 952 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
 953 {
 954   ldrsb_wb (cpu, offset, NoWriteBack);
 955 }
 956
 957 /* 64 bit load sign-extended byte scaled or unscaled zero-
 958    or sign-extended 32-bit register offset.  */
 959 static void
 960 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 961 {
 962   unsigned rm = INSTR (20, 16);
 963   unsigned rn = INSTR (9, 5);
 964   unsigned rt = INSTR (4, 0);
 965   /* rn may reference SP, rm and rt must reference ZR  */
 966
 967   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 968   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
 969                                  extension);
 970   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 971   /* There is no scaling required for a byte load.  */
 972   aarch64_set_reg_s64 (cpu, rt, NO_SP,
 973                        aarch64_get_mem_s8 (cpu, address + displacement));
 974 }
 975
 976 /* 32 bit load zero-extended short scaled unsigned 12 bit.  */
 977 static void
 978 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
 979 {
 980   unsigned rn = INSTR (9, 5);
 981   unsigned rt = INSTR (4, 0);
 982   uint32_t val;
 983
 984   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 985   /* The target register may not be SP but the source may be.  */
 986   val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 987                              + SCALE (offset, 16));
 988   aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
 989 }
 990
 991 /* 32 bit load zero-extended short unscaled signed 9 bit
 992    with pre- or post-writeback.  */
 993 static void
 994 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 995 {
 996   unsigned rn = INSTR (9, 5);
 997   unsigned rt = INSTR (4, 0);
 998   uint64_t address;
 999
1000   if (rn == rt && wb != NoWriteBack)
1001     HALT_UNALLOC;
1002
1003   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1004
1005   if (wb != Post)
1006     address += offset;
1007
1008   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1009   aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1010
1011   if (wb == Post)
1012     address += offset;
1013
1014   if (wb != NoWriteBack)
1015     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1016 }
1017
1018 /* 32 bit load zero-extended short scaled or unscaled zero-
1019    or sign-extended 32-bit register offset.  */
1020 static void
1021 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1022 {
1023   unsigned rm = INSTR (20, 16);
1024   unsigned rn = INSTR (9, 5);
1025   unsigned rt = INSTR (4, 0);
1026   /* rn may reference SP, rm and rt must reference ZR  */
1027
1028   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1029   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1030   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1031
1032   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1033   aarch64_set_reg_u32 (cpu, rt, NO_SP,
1034                        aarch64_get_mem_u16 (cpu, address + displacement));
1035 }
1036
1037 /* 32 bit load sign-extended short scaled unsigned 12 bit.  */
1038 static void
1039 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1040 {
1041   unsigned rn = INSTR (9, 5);
1042   unsigned rt = INSTR (4, 0);
1043   int32_t val;
1044
1045   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1046   /* The target register may not be SP but the source may be.  */
1047   val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1048                              + SCALE (offset, 16));
1049   aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1050 }
1051
1052 /* 32 bit load sign-extended short unscaled signed 9 bit
1053    with pre- or post-writeback.  */
1054 static void
1055 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1056 {
1057   unsigned rn = INSTR (9, 5);
1058   unsigned rt = INSTR (4, 0);
1059   uint64_t address;
1060
1061   if (rn == rt && wb != NoWriteBack)
1062     HALT_UNALLOC;
1063
1064   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1065
1066   if (wb != Post)
1067     address += offset;
1068
1069   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1070   aarch64_set_reg_s32 (cpu, rt, NO_SP,
1071                        (int32_t) aarch64_get_mem_s16 (cpu, address));
1072
1073   if (wb == Post)
1074     address += offset;
1075
1076   if (wb != NoWriteBack)
1077     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1078 }
1079
1080 /* 32 bit load sign-extended short scaled or unscaled zero-
1081    or sign-extended 32-bit register offset.  */
1082 static void
1083 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1084 {
1085   unsigned rm = INSTR (20, 16);
1086   unsigned rn = INSTR (9, 5);
1087   unsigned rt = INSTR (4, 0);
1088   /* rn may reference SP, rm and rt must reference ZR  */
1089
1090   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1091   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1092   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1093
1094   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1095   aarch64_set_reg_s32 (cpu, rt, NO_SP,
1096                        (int32_t) aarch64_get_mem_s16
1097                        (cpu, address + displacement));
1098 }
1099
1100 /* 64 bit load sign-extended short scaled unsigned 12 bit.  */
1101 static void
1102 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1103 {
1104   unsigned rn = INSTR (9, 5);
1105   unsigned rt = INSTR (4, 0);
1106   int64_t val;
1107
1108   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1109   /* The target register may not be SP but the source may be.  */
1110   val = aarch64_get_mem_s16  (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1111                               + SCALE (offset, 16));
1112   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1113 }
1114
1115 /* 64 bit load sign-extended short unscaled signed 9 bit
1116    with pre- or post-writeback.  */
1117 static void
1118 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1119 {
1120   unsigned rn = INSTR (9, 5);
1121   unsigned rt = INSTR (4, 0);
1122   uint64_t address;
1123   int64_t val;
1124
1125   if (rn == rt && wb != NoWriteBack)
1126     HALT_UNALLOC;
1127
1128   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1129   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1130
1131   if (wb != Post)
1132     address += offset;
1133
1134   val = aarch64_get_mem_s16 (cpu, address);
1135   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1136
1137   if (wb == Post)
1138     address += offset;
1139
1140   if (wb != NoWriteBack)
1141     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1142 }
1143
1144 /* 64 bit load sign-extended short scaled or unscaled zero-
1145    or sign-extended 32-bit register offset.  */
1146 static void
1147 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1148 {
1149   unsigned rm = INSTR (20, 16);
1150   unsigned rn = INSTR (9, 5);
1151   unsigned rt = INSTR (4, 0);
1152
1153   /* rn may reference SP, rm and rt must reference ZR  */
1154
1155   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1156   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1157   uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1158   int64_t val;
1159
1160   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1161   val = aarch64_get_mem_s16 (cpu, address + displacement);
1162   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1163 }
1164
1165 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit.  */
1166 static void
1167 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1168 {
1169   unsigned rn = INSTR (9, 5);
1170   unsigned rt = INSTR (4, 0);
1171   int64_t val;
1172
1173   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1174   val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1175                              + SCALE (offset, 32));
1176   /* The target register may not be SP but the source may be.  */
1177   return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1178 }
1179
1180 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1181    with pre- or post-writeback.  */
1182 static void
1183 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1184 {
1185   unsigned rn = INSTR (9, 5);
1186   unsigned rt = INSTR (4, 0);
1187   uint64_t address;
1188
1189   if (rn == rt && wb != NoWriteBack)
1190     HALT_UNALLOC;
1191
1192   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1193
1194   if (wb != Post)
1195     address += offset;
1196
1197   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1198   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1199
1200   if (wb == Post)
1201     address += offset;
1202
1203   if (wb != NoWriteBack)
1204     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1205 }
1206
1207 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1208    or sign-extended 32-bit register offset.  */
1209 static void
1210 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1211 {
1212   unsigned rm = INSTR (20, 16);
1213   unsigned rn = INSTR (9, 5);
1214   unsigned rt = INSTR (4, 0);
1215   /* rn may reference SP, rm and rt must reference ZR  */
1216
1217   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1218   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1219   uint64_t displacement =  OPT_SCALE (extended, 32, scaling);
1220
1221   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1222   aarch64_set_reg_s64 (cpu, rt, NO_SP,
1223                        aarch64_get_mem_s32 (cpu, address + displacement));
1224 }
1225
1226 /* N.B. with stores the value in source is written to the
1227    address identified by source2 modified by source3/offset.  */
1228
1229 /* 32 bit store scaled unsigned 12 bit.  */
1230 static void
1231 str32_abs (sim_cpu *cpu, uint32_t offset)
1232 {
1233   unsigned rn = INSTR (9, 5);
1234   unsigned rt = INSTR (4, 0);
1235
1236   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1237   /* The target register may not be SP but the source may be.  */
1238   aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1239                              + SCALE (offset, 32)),
1240                        aarch64_get_reg_u32 (cpu, rt, NO_SP));
1241 }
1242
1243 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback.  */
1244 static void
1245 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1246 {
1247   unsigned rn = INSTR (9, 5);
1248   unsigned rt = INSTR (4, 0);
1249   uint64_t address;
1250
1251   if (rn == rt && wb != NoWriteBack)
1252     HALT_UNALLOC;
1253
1254   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1255   if (wb != Post)
1256     address += offset;
1257
1258   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1259   aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1260
1261   if (wb == Post)
1262     address += offset;
1263
1264   if (wb != NoWriteBack)
1265     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1266 }
1267
1268 /* 32 bit store scaled or unscaled zero- or
1269    sign-extended 32-bit register offset.  */
1270 static void
1271 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1272 {
1273   unsigned rm = INSTR (20, 16);
1274   unsigned rn = INSTR (9, 5);
1275   unsigned rt = INSTR (4, 0);
1276
1277   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1278   int64_t  extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1279   uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1280
1281   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1282   aarch64_set_mem_u32 (cpu, address + displacement,
1283                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1284 }
1285
1286 /* 64 bit store scaled unsigned 12 bit.  */
1287 static void
1288 str_abs (sim_cpu *cpu, uint32_t offset)
1289 {
1290   unsigned rn = INSTR (9, 5);
1291   unsigned rt = INSTR (4, 0);
1292
1293   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1294   aarch64_set_mem_u64 (cpu,
1295                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
1296                        + SCALE (offset, 64),
1297                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1298 }
1299
1300 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback.  */
1301 static void
1302 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1303 {
1304   unsigned rn = INSTR (9, 5);
1305   unsigned rt = INSTR (4, 0);
1306   uint64_t address;
1307
1308   if (rn == rt && wb != NoWriteBack)
1309     HALT_UNALLOC;
1310
1311   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1312
1313   if (wb != Post)
1314     address += offset;
1315
1316   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1317   aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1318
1319   if (wb == Post)
1320     address += offset;
1321
1322   if (wb != NoWriteBack)
1323     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1324 }
1325
1326 /* 64 bit store scaled or unscaled zero-
1327    or sign-extended 32-bit register offset.  */
1328 static void
1329 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1330 {
1331   unsigned rm = INSTR (20, 16);
1332   unsigned rn = INSTR (9, 5);
1333   unsigned rt = INSTR (4, 0);
1334   /* rn may reference SP, rm and rt must reference ZR  */
1335
1336   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1337   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1338                                extension);
1339   uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1340
1341   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1342   aarch64_set_mem_u64 (cpu, address + displacement,
1343                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1344 }
1345
1346 /* 32 bit store byte scaled unsigned 12 bit.  */
1347 static void
1348 strb_abs (sim_cpu *cpu, uint32_t offset)
1349 {
1350   unsigned rn = INSTR (9, 5);
1351   unsigned rt = INSTR (4, 0);
1352
1353   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1354   /* The target register may not be SP but the source may be.
1355      There is no scaling required for a byte load.  */
1356   aarch64_set_mem_u8 (cpu,
1357                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1358                       aarch64_get_reg_u8 (cpu, rt, NO_SP));
1359 }
1360
1361 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback.  */
1362 static void
1363 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1364 {
1365   unsigned rn = INSTR (9, 5);
1366   unsigned rt = INSTR (4, 0);
1367   uint64_t address;
1368
1369   if (rn == rt && wb != NoWriteBack)
1370     HALT_UNALLOC;
1371
1372   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1373
1374   if (wb != Post)
1375     address += offset;
1376
1377   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1378   aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1379
1380   if (wb == Post)
1381     address += offset;
1382
1383   if (wb != NoWriteBack)
1384     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1385 }
1386
1387 /* 32 bit store byte scaled or unscaled zero-
1388    or sign-extended 32-bit register offset.  */
1389 static void
1390 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1391 {
1392   unsigned rm = INSTR (20, 16);
1393   unsigned rn = INSTR (9, 5);
1394   unsigned rt = INSTR (4, 0);
1395   /* rn may reference SP, rm and rt must reference ZR  */
1396
1397   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1399                                  extension);
1400
1401   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1402   /* There is no scaling required for a byte load.  */
1403   aarch64_set_mem_u8 (cpu, address + displacement,
1404                       aarch64_get_reg_u8 (cpu, rt, NO_SP));
1405 }
1406
1407 /* 32 bit store short scaled unsigned 12 bit.  */
1408 static void
1409 strh_abs (sim_cpu *cpu, uint32_t offset)
1410 {
1411   unsigned rn = INSTR (9, 5);
1412   unsigned rt = INSTR (4, 0);
1413
1414   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1415   /* The target register may not be SP but the source may be.  */
1416   aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1417                        + SCALE (offset, 16),
1418                        aarch64_get_reg_u16 (cpu, rt, NO_SP));
1419 }
1420
1421 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback.  */
1422 static void
1423 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1424 {
1425   unsigned rn = INSTR (9, 5);
1426   unsigned rt = INSTR (4, 0);
1427   uint64_t address;
1428
1429   if (rn == rt && wb != NoWriteBack)
1430     HALT_UNALLOC;
1431
1432   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1433
1434   if (wb != Post)
1435     address += offset;
1436
1437   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1438   aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1439
1440   if (wb == Post)
1441     address += offset;
1442
1443   if (wb != NoWriteBack)
1444     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1445 }
1446
1447 /* 32 bit store short scaled or unscaled zero-
1448    or sign-extended 32-bit register offset.  */
1449 static void
1450 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1451 {
1452   unsigned rm = INSTR (20, 16);
1453   unsigned rn = INSTR (9, 5);
1454   unsigned rt = INSTR (4, 0);
1455   /* rn may reference SP, rm and rt must reference ZR  */
1456
1457   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1458   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1459   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1460
1461   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1462   aarch64_set_mem_u16 (cpu, address + displacement,
1463                        aarch64_get_reg_u16 (cpu, rt, NO_SP));
1464 }
1465
1466 /* Prefetch unsigned 12 bit.  */
1467 static void
1468 prfm_abs (sim_cpu *cpu, uint32_t offset)
1469 {
1470   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1471                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1472                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1473                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1474                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1475                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1476                           ow ==> UNALLOC
1477      PrfOp prfop = prfop (instr, 4, 0);
1478      uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1479      + SCALE (offset, 64).  */
1480
1481   /* TODO : implement prefetch of address.  */
1482 }
1483
1484 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset.  */
1485 static void
1486 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1487 {
1488   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1489                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1490                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1491                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1492                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1493                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1494                           ow ==> UNALLOC
1495      rn may reference SP, rm may only reference ZR
1496      PrfOp prfop = prfop (instr, 4, 0);
1497      uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498      int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1499                                 extension);
1500      uint64_t displacement =  OPT_SCALE (extended, 64, scaling);
1501      uint64_t address = base + displacement.  */
1502
1503   /* TODO : implement prefetch of address  */
1504 }
1505
1506 /* 64 bit pc-relative prefetch.  */
1507 static void
1508 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1509 {
1510   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1511                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1512                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1513                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1514                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1515                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1516                           ow ==> UNALLOC
1517      PrfOp prfop = prfop (instr, 4, 0);
1518      uint64_t address = aarch64_get_PC (cpu) + offset.  */
1519
1520   /* TODO : implement this  */
1521 }
1522
1523 /* Load-store exclusive.  */
1524
1525 static void
1526 ldxr (sim_cpu *cpu)
1527 {
1528   unsigned rn = INSTR (9, 5);
1529   unsigned rt = INSTR (4, 0);
1530   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1531   int size = INSTR (31, 30);
1532   /* int ordered = INSTR (15, 15);  */
1533   /* int exclusive = ! INSTR (23, 23);  */
1534
1535   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1536   switch (size)
1537     {
1538     case 0:
1539       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1540       break;
1541     case 1:
1542       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1543       break;
1544     case 2:
1545       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1546       break;
1547     case 3:
1548       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1549       break;
1550     }
1551 }
1552
1553 static void
1554 stxr (sim_cpu *cpu)
1555 {
1556   unsigned rn = INSTR (9, 5);
1557   unsigned rt = INSTR (4, 0);
1558   unsigned rs = INSTR (20, 16);
1559   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1560   int      size = INSTR (31, 30);
1561   uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1562
1563   switch (size)
1564     {
1565     case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1566     case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1567     case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1568     case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1569     }
1570
1571   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1572   aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive...  */
1573 }
1574
1575 static void
1576 dexLoadLiteral (sim_cpu *cpu)
1577 {
1578   /* instr[29,27] == 011
1579      instr[25,24] == 00
1580      instr[31,30:26] = opc: 000 ==> LDRW,  001 ==> FLDRS
1581                             010 ==> LDRX,  011 ==> FLDRD
1582                             100 ==> LDRSW, 101 ==> FLDRQ
1583                             110 ==> PRFM, 111 ==> UNALLOC
1584      instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1585      instr[23, 5] == simm19  */
1586
1587   /* unsigned rt = INSTR (4, 0);  */
1588   uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1589   int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1590
1591   switch (dispatch)
1592     {
1593     case 0: ldr32_pcrel (cpu, imm); break;
1594     case 1: fldrs_pcrel (cpu, imm); break;
1595     case 2: ldr_pcrel   (cpu, imm); break;
1596     case 3: fldrd_pcrel (cpu, imm); break;
1597     case 4: ldrsw_pcrel (cpu, imm); break;
1598     case 5: fldrq_pcrel (cpu, imm); break;
1599     case 6: prfm_pcrel  (cpu, imm); break;
1600     case 7:
1601     default:
1602       HALT_UNALLOC;
1603     }
1604 }
1605
1606 /* Immediate arithmetic
1607    The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1608    value left shifted by 12 bits (done at decode).
1609
1610    N.B. the register args (dest, source) can normally be Xn or SP.
1611    the exception occurs for flag setting instructions which may
1612    only use Xn for the output (dest).  */
1613
1614 /* 32 bit add immediate.  */
1615 static void
1616 add32 (sim_cpu *cpu, uint32_t aimm)
1617 {
1618   unsigned rn = INSTR (9, 5);
1619   unsigned rd = INSTR (4, 0);
1620
1621   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1622   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1623                        aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1624 }
1625
1626 /* 64 bit add immediate.  */
1627 static void
1628 add64 (sim_cpu *cpu, uint32_t aimm)
1629 {
1630   unsigned rn = INSTR (9, 5);
1631   unsigned rd = INSTR (4, 0);
1632
1633   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1634   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1635                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1636 }
1637
1638 static void
1639 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1640 {
1641   int32_t   result = value1 + value2;
1642   int64_t   sresult = (int64_t) value1 + (int64_t) value2;
1643   uint64_t  uresult = (uint64_t)(uint32_t) value1
1644     + (uint64_t)(uint32_t) value2;
1645   uint32_t  flags = 0;
1646
1647   if (result == 0)
1648     flags |= Z;
1649
1650   if (result & (1 << 31))
1651     flags |= N;
1652
1653   if (uresult != (uint32_t)result)
1654     flags |= C;
1655
1656   if (sresult != result)
1657     flags |= V;
1658
1659   aarch64_set_CPSR (cpu, flags);
1660 }
1661
1662 #define NEG(a) (((a) & signbit) == signbit)
1663 #define POS(a) (((a) & signbit) == 0)
1664
1665 static void
1666 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1667 {
1668   uint64_t result = value1 + value2;
1669   uint32_t flags = 0;
1670   uint64_t signbit = 1ULL << 63;
1671
1672   if (result == 0)
1673     flags |= Z;
1674
1675   if (NEG (result))
1676     flags |= N;
1677
1678   if (   (NEG (value1) && NEG (value2))
1679       || (NEG (value1) && POS (result))
1680       || (NEG (value2) && POS (result)))
1681     flags |= C;
1682
1683   if (   (NEG (value1) && NEG (value2) && POS (result))
1684       || (POS (value1) && POS (value2) && NEG (result)))
1685     flags |= V;
1686
1687   aarch64_set_CPSR (cpu, flags);
1688 }
1689
1690 static void
1691 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1692 {
1693   uint32_t result = value1 - value2;
1694   uint32_t flags = 0;
1695   uint32_t signbit = 1U << 31;
1696
1697   if (result == 0)
1698     flags |= Z;
1699
1700   if (NEG (result))
1701     flags |= N;
1702
1703   if (   (NEG (value1) && POS (value2))
1704       || (NEG (value1) && POS (result))
1705       || (POS (value2) && POS (result)))
1706     flags |= C;
1707
1708   if (   (NEG (value1) && POS (value2) && POS (result))
1709       || (POS (value1) && NEG (value2) && NEG (result)))
1710     flags |= V;
1711
1712   aarch64_set_CPSR (cpu, flags);
1713 }
1714
1715 static void
1716 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1717 {
1718   uint64_t result = value1 - value2;
1719   uint32_t flags = 0;
1720   uint64_t signbit = 1ULL << 63;
1721
1722   if (result == 0)
1723     flags |= Z;
1724
1725   if (NEG (result))
1726     flags |= N;
1727
1728   if (   (NEG (value1) && POS (value2))
1729       || (NEG (value1) && POS (result))
1730       || (POS (value2) && POS (result)))
1731     flags |= C;
1732
1733   if (   (NEG (value1) && POS (value2) && POS (result))
1734       || (POS (value1) && NEG (value2) && NEG (result)))
1735     flags |= V;
1736
1737   aarch64_set_CPSR (cpu, flags);
1738 }
1739
1740 static void
1741 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1742 {
1743   uint32_t flags = 0;
1744
1745   if (result == 0)
1746     flags |= Z;
1747   else
1748     flags &= ~ Z;
1749
1750   if (result & (1 << 31))
1751     flags |= N;
1752   else
1753     flags &= ~ N;
1754
1755   aarch64_set_CPSR (cpu, flags);
1756 }
1757
1758 static void
1759 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1760 {
1761   uint32_t flags = 0;
1762
1763   if (result == 0)
1764     flags |= Z;
1765   else
1766     flags &= ~ Z;
1767
1768   if (result & (1ULL << 63))
1769     flags |= N;
1770   else
1771     flags &= ~ N;
1772
1773   aarch64_set_CPSR (cpu, flags);
1774 }
1775
1776 /* 32 bit add immediate set flags.  */
1777 static void
1778 adds32 (sim_cpu *cpu, uint32_t aimm)
1779 {
1780   unsigned rn = INSTR (9, 5);
1781   unsigned rd = INSTR (4, 0);
1782   /* TODO : do we need to worry about signs here?  */
1783   int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1784
1785   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1786   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1787   set_flags_for_add32 (cpu, value1, aimm);
1788 }
1789
1790 /* 64 bit add immediate set flags.  */
1791 static void
1792 adds64 (sim_cpu *cpu, uint32_t aimm)
1793 {
1794   unsigned rn = INSTR (9, 5);
1795   unsigned rd = INSTR (4, 0);
1796   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1797   uint64_t value2 = aimm;
1798
1799   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1800   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1801   set_flags_for_add64 (cpu, value1, value2);
1802 }
1803
1804 /* 32 bit sub immediate.  */
1805 static void
1806 sub32 (sim_cpu *cpu, uint32_t aimm)
1807 {
1808   unsigned rn = INSTR (9, 5);
1809   unsigned rd = INSTR (4, 0);
1810
1811   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1812   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1813                        aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1814 }
1815
1816 /* 64 bit sub immediate.  */
1817 static void
1818 sub64 (sim_cpu *cpu, uint32_t aimm)
1819 {
1820   unsigned rn = INSTR (9, 5);
1821   unsigned rd = INSTR (4, 0);
1822
1823   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1824   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1825                        aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1826 }
1827
1828 /* 32 bit sub immediate set flags.  */
1829 static void
1830 subs32 (sim_cpu *cpu, uint32_t aimm)
1831 {
1832   unsigned rn = INSTR (9, 5);
1833   unsigned rd = INSTR (4, 0);
1834   uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1835   uint32_t value2 = aimm;
1836
1837   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1838   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1839   set_flags_for_sub32 (cpu, value1, value2);
1840 }
1841
1842 /* 64 bit sub immediate set flags.  */
1843 static void
1844 subs64 (sim_cpu *cpu, uint32_t aimm)
1845 {
1846   unsigned rn = INSTR (9, 5);
1847   unsigned rd = INSTR (4, 0);
1848   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1849   uint32_t value2 = aimm;
1850
1851   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1852   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1853   set_flags_for_sub64 (cpu, value1, value2);
1854 }
1855
1856 /* Data Processing Register.  */
1857
1858 /* First two helpers to perform the shift operations.  */
1859
1860 static inline uint32_t
1861 shifted32 (uint32_t value, Shift shift, uint32_t count)
1862 {
1863   switch (shift)
1864     {
1865     default:
1866     case LSL:
1867       return (value << count);
1868     case LSR:
1869       return (value >> count);
1870     case ASR:
1871       {
1872         int32_t svalue = value;
1873         return (svalue >> count);
1874       }
1875     case ROR:
1876       {
1877         uint32_t top = value >> count;
1878         uint32_t bottom = value << (32 - count);
1879         return (bottom | top);
1880       }
1881     }
1882 }
1883
1884 static inline uint64_t
1885 shifted64 (uint64_t value, Shift shift, uint32_t count)
1886 {
1887   switch (shift)
1888     {
1889     default:
1890     case LSL:
1891       return (value << count);
1892     case LSR:
1893       return (value >> count);
1894     case ASR:
1895       {
1896         int64_t svalue = value;
1897         return (svalue >> count);
1898       }
1899     case ROR:
1900       {
1901         uint64_t top = value >> count;
1902         uint64_t bottom = value << (64 - count);
1903         return (bottom | top);
1904       }
1905     }
1906 }
1907
1908 /* Arithmetic shifted register.
1909    These allow an optional LSL, ASR or LSR to the second source
1910    register with a count up to the register bit count.
1911
1912    N.B register args may not be SP.  */
1913
1914 /* 32 bit ADD shifted register.  */
1915 static void
1916 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1917 {
1918   unsigned rm = INSTR (20, 16);
1919   unsigned rn = INSTR (9, 5);
1920   unsigned rd = INSTR (4, 0);
1921
1922   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1923   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1924                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
1925                        + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1926                                     shift, count));
1927 }
1928
1929 /* 64 bit ADD shifted register.  */
1930 static void
1931 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1932 {
1933   unsigned rm = INSTR (20, 16);
1934   unsigned rn = INSTR (9, 5);
1935   unsigned rd = INSTR (4, 0);
1936
1937   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1938   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1939                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
1940                        + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1941                                     shift, count));
1942 }
1943
1944 /* 32 bit ADD shifted register setting flags.  */
1945 static void
1946 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1947 {
1948   unsigned rm = INSTR (20, 16);
1949   unsigned rn = INSTR (9, 5);
1950   unsigned rd = INSTR (4, 0);
1951
1952   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1953   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1954                                shift, count);
1955
1956   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1957   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1958   set_flags_for_add32 (cpu, value1, value2);
1959 }
1960
1961 /* 64 bit ADD shifted register setting flags.  */
1962 static void
1963 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1964 {
1965   unsigned rm = INSTR (20, 16);
1966   unsigned rn = INSTR (9, 5);
1967   unsigned rd = INSTR (4, 0);
1968
1969   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1970   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1971                                shift, count);
1972
1973   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1974   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1975   set_flags_for_add64 (cpu, value1, value2);
1976 }
1977
1978 /* 32 bit SUB shifted register.  */
1979 static void
1980 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1981 {
1982   unsigned rm = INSTR (20, 16);
1983   unsigned rn = INSTR (9, 5);
1984   unsigned rd = INSTR (4, 0);
1985
1986   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1987   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1988                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
1989                        - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1990                                     shift, count));
1991 }
1992
1993 /* 64 bit SUB shifted register.  */
1994 static void
1995 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1996 {
1997   unsigned rm = INSTR (20, 16);
1998   unsigned rn = INSTR (9, 5);
1999   unsigned rd = INSTR (4, 0);
2000
2001   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2002   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2003                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2004                        - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2005                                     shift, count));
2006 }
2007
2008 /* 32 bit SUB shifted register setting flags.  */
2009 static void
2010 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2011 {
2012   unsigned rm = INSTR (20, 16);
2013   unsigned rn = INSTR (9, 5);
2014   unsigned rd = INSTR (4, 0);
2015
2016   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2017   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2018                               shift, count);
2019
2020   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2021   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2022   set_flags_for_sub32 (cpu, value1, value2);
2023 }
2024
2025 /* 64 bit SUB shifted register setting flags.  */
2026 static void
2027 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2028 {
2029   unsigned rm = INSTR (20, 16);
2030   unsigned rn = INSTR (9, 5);
2031   unsigned rd = INSTR (4, 0);
2032
2033   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2034   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2035                                shift, count);
2036
2037   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2038   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2039   set_flags_for_sub64 (cpu, value1, value2);
2040 }
2041
2042 /* First a couple more helpers to fetch the
2043    relevant source register element either
2044    sign or zero extended as required by the
2045    extension value.  */
2046
2047 static uint32_t
2048 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2049 {
2050   switch (extension)
2051     {
2052     case UXTB: return aarch64_get_reg_u8  (cpu, lo, NO_SP);
2053     case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2054     case UXTW: /* Fall through.  */
2055     case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2056     case SXTB: return aarch64_get_reg_s8  (cpu, lo, NO_SP);
2057     case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2058     case SXTW: /* Fall through.  */
2059     case SXTX: /* Fall through.  */
2060     default:   return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2061   }
2062 }
2063
2064 static uint64_t
2065 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2066 {
2067   switch (extension)
2068     {
2069     case UXTB: return aarch64_get_reg_u8  (cpu, lo, NO_SP);
2070     case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2071     case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2072     case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2073     case SXTB: return aarch64_get_reg_s8  (cpu, lo, NO_SP);
2074     case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2075     case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2076     case SXTX:
2077     default:   return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2078     }
2079 }
2080
2081 /* Arithmetic extending register
2082    These allow an optional sign extension of some portion of the
2083    second source register followed by an optional left shift of
2084    between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2085
2086    N.B output (dest) and first input arg (source) may normally be Xn
2087    or SP. However, for flag setting operations dest can only be
2088    Xn. Second input registers are always Xn.  */
2089
2090 /* 32 bit ADD extending register.  */
2091 static void
2092 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2093 {
2094   unsigned rm = INSTR (20, 16);
2095   unsigned rn = INSTR (9, 5);
2096   unsigned rd = INSTR (4, 0);
2097
2098   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2099   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2100                        aarch64_get_reg_u32 (cpu, rn, SP_OK)
2101                        + (extreg32 (cpu, rm, extension) << shift));
2102 }
2103
2104 /* 64 bit ADD extending register.
2105    N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0.  */
2106 static void
2107 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2108 {
2109   unsigned rm = INSTR (20, 16);
2110   unsigned rn = INSTR (9, 5);
2111   unsigned rd = INSTR (4, 0);
2112
2113   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2114   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2115                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
2116                        + (extreg64 (cpu, rm, extension) << shift));
2117 }
2118
2119 /* 32 bit ADD extending register setting flags.  */
2120 static void
2121 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2122 {
2123   unsigned rm = INSTR (20, 16);
2124   unsigned rn = INSTR (9, 5);
2125   unsigned rd = INSTR (4, 0);
2126
2127   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2128   uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2129
2130   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2131   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2132   set_flags_for_add32 (cpu, value1, value2);
2133 }
2134
2135 /* 64 bit ADD extending register setting flags  */
2136 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0  */
2137 static void
2138 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2139 {
2140   unsigned rm = INSTR (20, 16);
2141   unsigned rn = INSTR (9, 5);
2142   unsigned rd = INSTR (4, 0);
2143
2144   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2145   uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2146
2147   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2148   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2149   set_flags_for_add64 (cpu, value1, value2);
2150 }
2151
2152 /* 32 bit SUB extending register.  */
2153 static void
2154 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2155 {
2156   unsigned rm = INSTR (20, 16);
2157   unsigned rn = INSTR (9, 5);
2158   unsigned rd = INSTR (4, 0);
2159
2160   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2161   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2162                        aarch64_get_reg_u32 (cpu, rn, SP_OK)
2163                        - (extreg32 (cpu, rm, extension) << shift));
2164 }
2165
2166 /* 64 bit SUB extending register.  */
2167 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0.  */
2168 static void
2169 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2170 {
2171   unsigned rm = INSTR (20, 16);
2172   unsigned rn = INSTR (9, 5);
2173   unsigned rd = INSTR (4, 0);
2174
2175   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2176   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2177                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
2178                        - (extreg64 (cpu, rm, extension) << shift));
2179 }
2180
2181 /* 32 bit SUB extending register setting flags.  */
2182 static void
2183 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2184 {
2185   unsigned rm = INSTR (20, 16);
2186   unsigned rn = INSTR (9, 5);
2187   unsigned rd = INSTR (4, 0);
2188
2189   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2190   uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2191
2192   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2193   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2194   set_flags_for_sub32 (cpu, value1, value2);
2195 }
2196
2197 /* 64 bit SUB extending register setting flags  */
2198 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0  */
2199 static void
2200 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2201 {
2202   unsigned rm = INSTR (20, 16);
2203   unsigned rn = INSTR (9, 5);
2204   unsigned rd = INSTR (4, 0);
2205
2206   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2207   uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2208
2209   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2210   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2211   set_flags_for_sub64 (cpu, value1, value2);
2212 }
2213
2214 static void
2215 dexAddSubtractImmediate (sim_cpu *cpu)
2216 {
2217   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2218      instr[30]    = op : 0 ==> ADD, 1 ==> SUB
2219      instr[29]    = set : 0 ==> no flags, 1 ==> set flags
2220      instr[28,24] = 10001
2221      instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2222      instr[21,10] = uimm12
2223      instr[9,5]   = Rn
2224      instr[4,0]   = Rd  */
2225
2226   /* N.B. the shift is applied at decode before calling the add/sub routine.  */
2227   uint32_t shift = INSTR (23, 22);
2228   uint32_t imm = INSTR (21, 10);
2229   uint32_t dispatch = INSTR (31, 29);
2230
2231   NYI_assert (28, 24, 0x11);
2232
2233   if (shift > 1)
2234     HALT_UNALLOC;
2235
2236   if (shift)
2237     imm <<= 12;
2238
2239   switch (dispatch)
2240     {
2241     case 0: add32 (cpu, imm); break;
2242     case 1: adds32 (cpu, imm); break;
2243     case 2: sub32 (cpu, imm); break;
2244     case 3: subs32 (cpu, imm); break;
2245     case 4: add64 (cpu, imm); break;
2246     case 5: adds64 (cpu, imm); break;
2247     case 6: sub64 (cpu, imm); break;
2248     case 7: subs64 (cpu, imm); break;
2249     }
2250 }
2251
2252 static void
2253 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2254 {
2255   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2256      instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2257      instr[28,24] = 01011
2258      instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2259      instr[21]    = 0
2260      instr[20,16] = Rm
2261      instr[15,10] = count : must be 0xxxxx for 32 bit
2262      instr[9,5]   = Rn
2263      instr[4,0]   = Rd  */
2264
2265   uint32_t size = INSTR (31, 31);
2266   uint32_t count = INSTR (15, 10);
2267   Shift shiftType = INSTR (23, 22);
2268
2269   NYI_assert (28, 24, 0x0B);
2270   NYI_assert (21, 21, 0);
2271
2272   /* Shift encoded as ROR is unallocated.  */
2273   if (shiftType == ROR)
2274     HALT_UNALLOC;
2275
2276   /* 32 bit operations must have count[5] = 0
2277      or else we have an UNALLOC.  */
2278   if (size == 0 && uimm (count, 5, 5))
2279     HALT_UNALLOC;
2280
2281   /* Dispatch on size:op i.e instr [31,29].  */
2282   switch (INSTR (31, 29))
2283     {
2284     case 0: add32_shift  (cpu, shiftType, count); break;
2285     case 1: adds32_shift (cpu, shiftType, count); break;
2286     case 2: sub32_shift  (cpu, shiftType, count); break;
2287     case 3: subs32_shift (cpu, shiftType, count); break;
2288     case 4: add64_shift  (cpu, shiftType, count); break;
2289     case 5: adds64_shift (cpu, shiftType, count); break;
2290     case 6: sub64_shift  (cpu, shiftType, count); break;
2291     case 7: subs64_shift (cpu, shiftType, count); break;
2292     }
2293 }
2294
2295 static void
2296 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2297 {
2298   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2299      instr[30]    = op : 0 ==> ADD, 1 ==> SUB
2300      instr[29]    = set? : 0 ==> no flags, 1 ==> set flags
2301      instr[28,24] = 01011
2302      instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2303      instr[21]    = 1
2304      instr[20,16] = Rm
2305      instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2306                              000 ==> LSL|UXTW, 001 ==> UXTZ,
2307                              000 ==> SXTB, 001 ==> SXTH,
2308                              000 ==> SXTW, 001 ==> SXTX,
2309      instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2310      instr[9,5]   = Rn
2311      instr[4,0]   = Rd  */
2312
2313   Extension extensionType = INSTR (15, 13);
2314   uint32_t shift = INSTR (12, 10);
2315
2316   NYI_assert (28, 24, 0x0B);
2317   NYI_assert (21, 21, 1);
2318
2319   /* Shift may not exceed 4.  */
2320   if (shift > 4)
2321     HALT_UNALLOC;
2322
2323   /* Dispatch on size:op:set?.  */
2324   switch (INSTR (31, 29))
2325     {
2326     case 0: add32_ext  (cpu, extensionType, shift); break;
2327     case 1: adds32_ext (cpu, extensionType, shift); break;
2328     case 2: sub32_ext  (cpu, extensionType, shift); break;
2329     case 3: subs32_ext (cpu, extensionType, shift); break;
2330     case 4: add64_ext  (cpu, extensionType, shift); break;
2331     case 5: adds64_ext (cpu, extensionType, shift); break;
2332     case 6: sub64_ext  (cpu, extensionType, shift); break;
2333     case 7: subs64_ext (cpu, extensionType, shift); break;
2334     }
2335 }
2336
2337 /* Conditional data processing
2338    Condition register is implicit 3rd source.  */
2339
2340 /* 32 bit add with carry.  */
2341 /* N.B register args may not be SP.  */
2342
2343 static void
2344 adc32 (sim_cpu *cpu)
2345 {
2346   unsigned rm = INSTR (20, 16);
2347   unsigned rn = INSTR (9, 5);
2348   unsigned rd = INSTR (4, 0);
2349
2350   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2351   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2352                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
2353                        + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2354                        + IS_SET (C));
2355 }
2356
2357 /* 64 bit add with carry  */
2358 static void
2359 adc64 (sim_cpu *cpu)
2360 {
2361   unsigned rm = INSTR (20, 16);
2362   unsigned rn = INSTR (9, 5);
2363   unsigned rd = INSTR (4, 0);
2364
2365   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2366   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2367                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2368                        + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2369                        + IS_SET (C));
2370 }
2371
2372 /* 32 bit add with carry setting flags.  */
2373 static void
2374 adcs32 (sim_cpu *cpu)
2375 {
2376   unsigned rm = INSTR (20, 16);
2377   unsigned rn = INSTR (9, 5);
2378   unsigned rd = INSTR (4, 0);
2379
2380   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2381   uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2382   uint32_t carry = IS_SET (C);
2383
2384   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2385   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2386   set_flags_for_add32 (cpu, value1, value2 + carry);
2387 }
2388
2389 /* 64 bit add with carry setting flags.  */
2390 static void
2391 adcs64 (sim_cpu *cpu)
2392 {
2393   unsigned rm = INSTR (20, 16);
2394   unsigned rn = INSTR (9, 5);
2395   unsigned rd = INSTR (4, 0);
2396
2397   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2398   uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2399   uint64_t carry = IS_SET (C);
2400
2401   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2402   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2403   set_flags_for_add64 (cpu, value1, value2 + carry);
2404 }
2405
2406 /* 32 bit sub with carry.  */
2407 static void
2408 sbc32 (sim_cpu *cpu)
2409 {
2410   unsigned rm = INSTR (20, 16);
2411   unsigned rn = INSTR (9, 5); /* ngc iff rn == 31.  */
2412   unsigned rd = INSTR (4, 0);
2413
2414   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2415   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2416                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
2417                        - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2418                        - 1 + IS_SET (C));
2419 }
2420
2421 /* 64 bit sub with carry  */
2422 static void
2423 sbc64 (sim_cpu *cpu)
2424 {
2425   unsigned rm = INSTR (20, 16);
2426   unsigned rn = INSTR (9, 5);
2427   unsigned rd = INSTR (4, 0);
2428
2429   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2430   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2431                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2432                        - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2433                        - 1 + IS_SET (C));
2434 }
2435
2436 /* 32 bit sub with carry setting flags  */
2437 static void
2438 sbcs32 (sim_cpu *cpu)
2439 {
2440   unsigned rm = INSTR (20, 16);
2441   unsigned rn = INSTR (9, 5);
2442   unsigned rd = INSTR (4, 0);
2443
2444   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2445   uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2446   uint32_t carry  = IS_SET (C);
2447   uint32_t result = value1 - value2 + 1 - carry;
2448
2449   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2450   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2451   set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2452 }
2453
2454 /* 64 bit sub with carry setting flags  */
2455 static void
2456 sbcs64 (sim_cpu *cpu)
2457 {
2458   unsigned rm = INSTR (20, 16);
2459   unsigned rn = INSTR (9, 5);
2460   unsigned rd = INSTR (4, 0);
2461
2462   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2463   uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2464   uint64_t carry  = IS_SET (C);
2465   uint64_t result = value1 - value2 + 1 - carry;
2466
2467   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2468   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2469   set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2470 }
2471
2472 static void
2473 dexAddSubtractWithCarry (sim_cpu *cpu)
2474 {
2475   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2476      instr[30]    = op : 0 ==> ADC, 1 ==> SBC
2477      instr[29]    = set? : 0 ==> no flags, 1 ==> set flags
2478      instr[28,21] = 1 1010 000
2479      instr[20,16] = Rm
2480      instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2481      instr[9,5]   = Rn
2482      instr[4,0]   = Rd  */
2483
2484   uint32_t op2 = INSTR (15, 10);
2485
2486   NYI_assert (28, 21, 0xD0);
2487
2488   if (op2 != 0)
2489     HALT_UNALLOC;
2490
2491   /* Dispatch on size:op:set?.  */
2492   switch (INSTR (31, 29))
2493     {
2494     case 0: adc32 (cpu); break;
2495     case 1: adcs32 (cpu); break;
2496     case 2: sbc32 (cpu); break;
2497     case 3: sbcs32 (cpu); break;
2498     case 4: adc64 (cpu); break;
2499     case 5: adcs64 (cpu); break;
2500     case 6: sbc64 (cpu); break;
2501     case 7: sbcs64 (cpu); break;
2502     }
2503 }
2504
2505 static uint32_t
2506 testConditionCode (sim_cpu *cpu, CondCode cc)
2507 {
2508   /* This should be reduceable to branchless logic
2509      by some careful testing of bits in CC followed
2510      by the requisite masking and combining of bits
2511      from the flag register.
2512
2513      For now we do it with a switch.  */
2514   int res;
2515
2516   switch (cc)
2517     {
2518     case EQ:  res = IS_SET (Z);    break;
2519     case NE:  res = IS_CLEAR (Z);  break;
2520     case CS:  res = IS_SET (C);    break;
2521     case CC:  res = IS_CLEAR (C);  break;
2522     case MI:  res = IS_SET (N);    break;
2523     case PL:  res = IS_CLEAR (N);  break;
2524     case VS:  res = IS_SET (V);    break;
2525     case VC:  res = IS_CLEAR (V);  break;
2526     case HI:  res = IS_SET (C) && IS_CLEAR (Z);  break;
2527     case LS:  res = IS_CLEAR (C) || IS_SET (Z);  break;
2528     case GE:  res = IS_SET (N) == IS_SET (V);    break;
2529     case LT:  res = IS_SET (N) != IS_SET (V);    break;
2530     case GT:  res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V));  break;
2531     case LE:  res = IS_SET (Z) || (IS_SET (N) != IS_SET (V));    break;
2532     case AL:
2533     case NV:
2534     default:
2535       res = 1;
2536       break;
2537     }
2538   return res;
2539 }
2540
2541 static void
2542 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn  */
2543 {
2544   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2545      instr[30]    = compare with positive (1) or negative value (0)
2546      instr[29,21] = 1 1101 0010
2547      instr[20,16] = Rm or const
2548      instr[15,12] = cond
2549      instr[11]    = compare reg (0) or const (1)
2550      instr[10]    = 0
2551      instr[9,5]   = Rn
2552      instr[4]     = 0
2553      instr[3,0]   = value for CPSR bits if the comparison does not take place.  */
2554   signed int negate;
2555   unsigned rm;
2556   unsigned rn;
2557
2558   NYI_assert (29, 21, 0x1d2);
2559   NYI_assert (10, 10, 0);
2560   NYI_assert (4, 4, 0);
2561
2562   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2563   if (! testConditionCode (cpu, INSTR (15, 12)))
2564     {
2565       aarch64_set_CPSR (cpu, INSTR (3, 0));
2566       return;
2567     }
2568
2569   negate = INSTR (30, 30) ? 1 : -1;
2570   rm = INSTR (20, 16);
2571   rn = INSTR ( 9,  5);
2572
2573   if (INSTR (31, 31))
2574     {
2575       if (INSTR (11, 11))
2576         set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2577                              negate * (uint64_t) rm);
2578       else
2579         set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2580                              negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2581     }
2582   else
2583     {
2584       if (INSTR (11, 11))
2585         set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2586                              negate * rm);
2587       else
2588         set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2589                              negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2590     }
2591 }
2592
2593 static void
2594 do_vec_MOV_whole_vector (sim_cpu *cpu)
2595 {
2596   /* MOV Vd.T, Vs.T  (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2597
2598      instr[31]    = 0
2599      instr[30]    = half(0)/full(1)
2600      instr[29,21] = 001110101
2601      instr[20,16] = Vs
2602      instr[15,10] = 000111
2603      instr[9,5]   = Vs
2604      instr[4,0]   = Vd  */
2605
2606   unsigned vs = INSTR (9, 5);
2607   unsigned vd = INSTR (4, 0);
2608
2609   NYI_assert (29, 21, 0x075);
2610   NYI_assert (15, 10, 0x07);
2611
2612   if (INSTR (20, 16) != vs)
2613     HALT_NYI;
2614
2615   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2616   if (INSTR (30, 30))
2617     aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2618
2619   aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2620 }
2621
2622 static void
2623 do_vec_SMOV_into_scalar (sim_cpu *cpu)
2624 {
2625   /* instr[31]    = 0
2626      instr[30]    = word(0)/long(1)
2627      instr[29,21] = 00 1110 000
2628      instr[20,16] = element size and index
2629      instr[15,10] = 00 0010 11
2630      instr[9,5]   = V source
2631      instr[4,0]   = R dest  */
2632
2633   unsigned vs = INSTR (9, 5);
2634   unsigned rd = INSTR (4, 0);
2635   unsigned imm5 = INSTR (20, 16);
2636   unsigned full = INSTR (30, 30);
2637   int size, index;
2638
2639   NYI_assert (29, 21, 0x070);
2640   NYI_assert (15, 10, 0x0B);
2641
2642   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2643
2644   if (imm5 & 0x1)
2645     {
2646       size = 0;
2647       index = (imm5 >> 1) & 0xF;
2648     }
2649   else if (imm5 & 0x2)
2650     {
2651       size = 1;
2652       index = (imm5 >> 2) & 0x7;
2653     }
2654   else if (full && (imm5 & 0x4))
2655     {
2656       size = 2;
2657       index = (imm5 >> 3) & 0x3;
2658     }
2659   else
2660     HALT_UNALLOC;
2661
2662   switch (size)
2663     {
2664     case 0:
2665       if (full)
2666         aarch64_set_reg_s64 (cpu, rd, NO_SP,
2667                              aarch64_get_vec_s8 (cpu, vs, index));
2668       else
2669         aarch64_set_reg_s32 (cpu, rd, NO_SP,
2670                              aarch64_get_vec_s8 (cpu, vs, index));
2671       break;
2672
2673     case 1:
2674       if (full)
2675         aarch64_set_reg_s64 (cpu, rd, NO_SP,
2676                              aarch64_get_vec_s16 (cpu, vs, index));
2677       else
2678         aarch64_set_reg_s32 (cpu, rd, NO_SP,
2679                              aarch64_get_vec_s16 (cpu, vs, index));
2680       break;
2681
2682     case 2:
2683       aarch64_set_reg_s64 (cpu, rd, NO_SP,
2684                            aarch64_get_vec_s32 (cpu, vs, index));
2685       break;
2686
2687     default:
2688       HALT_UNALLOC;
2689     }
2690 }
2691
2692 static void
2693 do_vec_UMOV_into_scalar (sim_cpu *cpu)
2694 {
2695   /* instr[31]    = 0
2696      instr[30]    = word(0)/long(1)
2697      instr[29,21] = 00 1110 000
2698      instr[20,16] = element size and index
2699      instr[15,10] = 00 0011 11
2700      instr[9,5]   = V source
2701      instr[4,0]   = R dest  */
2702
2703   unsigned vs = INSTR (9, 5);
2704   unsigned rd = INSTR (4, 0);
2705   unsigned imm5 = INSTR (20, 16);
2706   unsigned full = INSTR (30, 30);
2707   int size, index;
2708
2709   NYI_assert (29, 21, 0x070);
2710   NYI_assert (15, 10, 0x0F);
2711
2712   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2713
2714   if (!full)
2715     {
2716       if (imm5 & 0x1)
2717         {
2718           size = 0;
2719           index = (imm5 >> 1) & 0xF;
2720         }
2721       else if (imm5 & 0x2)
2722         {
2723           size = 1;
2724           index = (imm5 >> 2) & 0x7;
2725         }
2726       else if (imm5 & 0x4)
2727         {
2728           size = 2;
2729           index = (imm5 >> 3) & 0x3;
2730         }
2731       else
2732         HALT_UNALLOC;
2733     }
2734   else if (imm5 & 0x8)
2735     {
2736       size = 3;
2737       index = (imm5 >> 4) & 0x1;
2738     }
2739   else
2740     HALT_UNALLOC;
2741
2742   switch (size)
2743     {
2744     case 0:
2745       aarch64_set_reg_u32 (cpu, rd, NO_SP,
2746                            aarch64_get_vec_u8 (cpu, vs, index));
2747       break;
2748
2749     case 1:
2750       aarch64_set_reg_u32 (cpu, rd, NO_SP,
2751                            aarch64_get_vec_u16 (cpu, vs, index));
2752       break;
2753
2754     case 2:
2755       aarch64_set_reg_u32 (cpu, rd, NO_SP,
2756                            aarch64_get_vec_u32 (cpu, vs, index));
2757       break;
2758
2759     case 3:
2760       aarch64_set_reg_u64 (cpu, rd, NO_SP,
2761                            aarch64_get_vec_u64 (cpu, vs, index));
2762       break;
2763
2764     default:
2765       HALT_UNALLOC;
2766     }
2767 }
2768
2769 static void
2770 do_vec_INS (sim_cpu *cpu)
2771 {
2772   /* instr[31,21] = 01001110000
2773      instr[20,16] = element size and index
2774      instr[15,10] = 000111
2775      instr[9,5]   = W source
2776      instr[4,0]   = V dest  */
2777
2778   int index;
2779   unsigned rs = INSTR (9, 5);
2780   unsigned vd = INSTR (4, 0);
2781
2782   NYI_assert (31, 21, 0x270);
2783   NYI_assert (15, 10, 0x07);
2784
2785   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2786   if (INSTR (16, 16))
2787     {
2788       index = INSTR (20, 17);
2789       aarch64_set_vec_u8 (cpu, vd, index,
2790                           aarch64_get_reg_u8 (cpu, rs, NO_SP));
2791     }
2792   else if (INSTR (17, 17))
2793     {
2794       index = INSTR (20, 18);
2795       aarch64_set_vec_u16 (cpu, vd, index,
2796                            aarch64_get_reg_u16 (cpu, rs, NO_SP));
2797     }
2798   else if (INSTR (18, 18))
2799     {
2800       index = INSTR (20, 19);
2801       aarch64_set_vec_u32 (cpu, vd, index,
2802                            aarch64_get_reg_u32 (cpu, rs, NO_SP));
2803     }
2804   else if (INSTR (19, 19))
2805     {
2806       index = INSTR (20, 20);
2807       aarch64_set_vec_u64 (cpu, vd, index,
2808                            aarch64_get_reg_u64 (cpu, rs, NO_SP));
2809     }
2810   else
2811     HALT_NYI;
2812 }
2813
2814 static void
2815 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2816 {
2817   /* instr[31]    = 0
2818      instr[30]    = half(0)/full(1)
2819      instr[29,21] = 00 1110 000
2820      instr[20,16] = element size and index
2821      instr[15,10] = 0000 01
2822      instr[9,5]   = V source
2823      instr[4,0]   = V dest.  */
2824
2825   unsigned full = INSTR (30, 30);
2826   unsigned vs = INSTR (9, 5);
2827   unsigned vd = INSTR (4, 0);
2828   int i, index;
2829
2830   NYI_assert (29, 21, 0x070);
2831   NYI_assert (15, 10, 0x01);
2832
2833   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2834   if (INSTR (16, 16))
2835     {
2836       index = INSTR (20, 17);
2837
2838       for (i = 0; i < (full ? 16 : 8); i++)
2839         aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2840     }
2841   else if (INSTR (17, 17))
2842     {
2843       index = INSTR (20, 18);
2844
2845       for (i = 0; i < (full ? 8 : 4); i++)
2846         aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2847     }
2848   else if (INSTR (18, 18))
2849     {
2850       index = INSTR (20, 19);
2851
2852       for (i = 0; i < (full ? 4 : 2); i++)
2853         aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2854     }
2855   else
2856     {
2857       if (INSTR (19, 19) == 0)
2858         HALT_UNALLOC;
2859
2860       if (! full)
2861         HALT_UNALLOC;
2862
2863       index = INSTR (20, 20);
2864
2865       for (i = 0; i < 2; i++)
2866         aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2867     }
2868 }
2869
2870 static void
2871 do_vec_TBL (sim_cpu *cpu)
2872 {
2873   /* instr[31]    = 0
2874      instr[30]    = half(0)/full(1)
2875      instr[29,21] = 00 1110 000
2876      instr[20,16] = Vm
2877      instr[15]    = 0
2878      instr[14,13] = vec length
2879      instr[12,10] = 000
2880      instr[9,5]   = V start
2881      instr[4,0]   = V dest  */
2882
2883   int full    = INSTR (30, 30);
2884   int len     = INSTR (14, 13) + 1;
2885   unsigned vm = INSTR (20, 16);
2886   unsigned vn = INSTR (9, 5);
2887   unsigned vd = INSTR (4, 0);
2888   unsigned i;
2889
2890   NYI_assert (29, 21, 0x070);
2891   NYI_assert (12, 10, 0);
2892
2893   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2894   for (i = 0; i < (full ? 16 : 8); i++)
2895     {
2896       unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2897       uint8_t val;
2898
2899       if (selector < 16)
2900         val = aarch64_get_vec_u8 (cpu, vn, selector);
2901       else if (selector < 32)
2902         val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2903       else if (selector < 48)
2904         val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2905       else if (selector < 64)
2906         val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2907       else
2908         val = 0;
2909
2910       aarch64_set_vec_u8 (cpu, vd, i, val);
2911     }
2912 }
2913
2914 static void
2915 do_vec_TRN (sim_cpu *cpu)
2916 {
2917   /* instr[31]    = 0
2918      instr[30]    = half(0)/full(1)
2919      instr[29,24] = 00 1110
2920      instr[23,22] = size
2921      instr[21]    = 0
2922      instr[20,16] = Vm
2923      instr[15]    = 0
2924      instr[14]    = TRN1 (0) / TRN2 (1)
2925      instr[13,10] = 1010
2926      instr[9,5]   = V source
2927      instr[4,0]   = V dest.  */
2928
2929   int full    = INSTR (30, 30);
2930   int second  = INSTR (14, 14);
2931   unsigned vm = INSTR (20, 16);
2932   unsigned vn = INSTR (9, 5);
2933   unsigned vd = INSTR (4, 0);
2934   unsigned i;
2935
2936   NYI_assert (29, 24, 0x0E);
2937   NYI_assert (13, 10, 0xA);
2938
2939   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2940   switch (INSTR (23, 22))
2941     {
2942     case 0:
2943       for (i = 0; i < (full ? 8 : 4); i++)
2944         {
2945           aarch64_set_vec_u8
2946             (cpu, vd, i * 2,
2947              aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2948           aarch64_set_vec_u8
2949             (cpu, vd, 1 * 2 + 1,
2950              aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2951         }
2952       break;
2953
2954     case 1:
2955       for (i = 0; i < (full ? 4 : 2); i++)
2956         {
2957           aarch64_set_vec_u16
2958             (cpu, vd, i * 2,
2959              aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2960           aarch64_set_vec_u16
2961             (cpu, vd, 1 * 2 + 1,
2962              aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2963         }
2964       break;
2965
2966     case 2:
2967       aarch64_set_vec_u32
2968         (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2969       aarch64_set_vec_u32
2970         (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2971       aarch64_set_vec_u32
2972         (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2973       aarch64_set_vec_u32
2974         (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2975       break;
2976
2977     case 3:
2978       if (! full)
2979         HALT_UNALLOC;
2980
2981       aarch64_set_vec_u64 (cpu, vd, 0,
2982                            aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2983       aarch64_set_vec_u64 (cpu, vd, 1,
2984                            aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2985       break;
2986     }
2987 }
2988
2989 static void
2990 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2991 {
2992   /* instr[31]    = 0
2993      instr[30]    = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2994                     [must be 1 for 64-bit xfer]
2995      instr[29,20] = 00 1110 0000
2996      instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2997                                   0100=> 32-bits. 1000=>64-bits
2998      instr[15,10] = 0000 11
2999      instr[9,5]   = W source
3000      instr[4,0]   = V dest.  */
3001
3002   unsigned i;
3003   unsigned Vd = INSTR (4, 0);
3004   unsigned Rs = INSTR (9, 5);
3005   int both    = INSTR (30, 30);
3006
3007   NYI_assert (29, 20, 0x0E0);
3008   NYI_assert (15, 10, 0x03);
3009
3010   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3011   switch (INSTR (19, 16))
3012     {
3013     case 1:
3014       for (i = 0; i < (both ? 16 : 8); i++)
3015         aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
3016       break;
3017
3018     case 2:
3019       for (i = 0; i < (both ? 8 : 4); i++)
3020         aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
3021       break;
3022
3023     case 4:
3024       for (i = 0; i < (both ? 4 : 2); i++)
3025         aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
3026       break;
3027
3028     case 8:
3029       if (!both)
3030         HALT_NYI;
3031       aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3032       aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3033       break;
3034
3035     default:
3036       HALT_NYI;
3037     }
3038 }
3039
3040 static void
3041 do_vec_UZP (sim_cpu *cpu)
3042 {
3043   /* instr[31]    = 0
3044      instr[30]    = half(0)/full(1)
3045      instr[29,24] = 00 1110
3046      instr[23,22] = size: byte(00), half(01), word (10), long (11)
3047      instr[21]    = 0
3048      instr[20,16] = Vm
3049      instr[15]    = 0
3050      instr[14]    = lower (0) / upper (1)
3051      instr[13,10] = 0110
3052      instr[9,5]   = Vn
3053      instr[4,0]   = Vd.  */
3054
3055   int full = INSTR (30, 30);
3056   int upper = INSTR (14, 14);
3057
3058   unsigned vm = INSTR (20, 16);
3059   unsigned vn = INSTR (9, 5);
3060   unsigned vd = INSTR (4, 0);
3061
3062   uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3063   uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3064   uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3065   uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3066
3067   uint64_t val1;
3068   uint64_t val2;
3069
3070   uint64_t input2 = full ? val_n2 : val_m1;
3071
3072   NYI_assert (29, 24, 0x0E);
3073   NYI_assert (21, 21, 0);
3074   NYI_assert (15, 15, 0);
3075   NYI_assert (13, 10, 6);
3076
3077   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3078   switch (INSTR (23, 22))
3079     {
3080     case 0:
3081       val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
3082       val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3083       val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3084       val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3085
3086       val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3087       val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3088       val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3089       val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3090
3091       if (full)
3092         {
3093           val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
3094           val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3095           val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3096           val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3097
3098           val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3099           val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3100           val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3101           val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3102         }
3103       break;
3104
3105     case 1:
3106       val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3107       val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3108
3109       val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3110       val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3111
3112       if (full)
3113         {
3114           val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3115           val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3116
3117           val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3118           val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3119         }
3120       break;
3121
3122     case 2:
3123       val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3124       val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3125
3126       if (full)
3127         {
3128           val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3129           val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3130         }
3131       break;
3132
3133     case 3:
3134       if (! full)
3135         HALT_UNALLOC;
3136
3137       val1 = upper ? val_n2 : val_n1;
3138       val2 = upper ? val_m2 : val_m1;
3139       break;
3140     }
3141
3142   aarch64_set_vec_u64 (cpu, vd, 0, val1);
3143   if (full)
3144     aarch64_set_vec_u64 (cpu, vd, 1, val2);
3145 }
3146
3147 static void
3148 do_vec_ZIP (sim_cpu *cpu)
3149 {
3150   /* instr[31]    = 0
3151      instr[30]    = half(0)/full(1)
3152      instr[29,24] = 00 1110
3153      instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3154      instr[21]    = 0
3155      instr[20,16] = Vm
3156      instr[15]    = 0
3157      instr[14]    = lower (0) / upper (1)
3158      instr[13,10] = 1110
3159      instr[9,5]   = Vn
3160      instr[4,0]   = Vd.  */
3161
3162   int full = INSTR (30, 30);
3163   int upper = INSTR (14, 14);
3164
3165   unsigned vm = INSTR (20, 16);
3166   unsigned vn = INSTR (9, 5);
3167   unsigned vd = INSTR (4, 0);
3168
3169   uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3170   uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3171   uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3172   uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3173
3174   uint64_t val1 = 0;
3175   uint64_t val2 = 0;
3176
3177   uint64_t input1 = upper ? val_n1 : val_m1;
3178   uint64_t input2 = upper ? val_n2 : val_m2;
3179
3180   NYI_assert (29, 24, 0x0E);
3181   NYI_assert (21, 21, 0);
3182   NYI_assert (15, 15, 0);
3183   NYI_assert (13, 10, 0xE);
3184
3185   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3186   switch (INSTR (23, 23))
3187     {
3188     case 0:
3189       val1 =
3190           ((input1 <<  0) & (0xFF    <<  0))
3191         | ((input2 <<  8) & (0xFF    <<  8))
3192         | ((input1 <<  8) & (0xFF    << 16))
3193         | ((input2 << 16) & (0xFF    << 24))
3194         | ((input1 << 16) & (0xFFULL << 32))
3195         | ((input2 << 24) & (0xFFULL << 40))
3196         | ((input1 << 24) & (0xFFULL << 48))
3197         | ((input2 << 32) & (0xFFULL << 56));
3198
3199       val2 =
3200           ((input1 >> 32) & (0xFF    <<  0))
3201         | ((input2 >> 24) & (0xFF    <<  8))
3202         | ((input1 >> 24) & (0xFF    << 16))
3203         | ((input2 >> 16) & (0xFF    << 24))
3204         | ((input1 >> 16) & (0xFFULL << 32))
3205         | ((input2 >>  8) & (0xFFULL << 40))
3206         | ((input1 >>  8) & (0xFFULL << 48))
3207         | ((input2 >>  0) & (0xFFULL << 56));
3208       break;
3209
3210     case 1:
3211       val1 =
3212           ((input1 <<  0) & (0xFFFF    <<  0))
3213         | ((input2 << 16) & (0xFFFF    << 16))
3214         | ((input1 << 16) & (0xFFFFULL << 32))
3215         | ((input2 << 32) & (0xFFFFULL << 48));
3216
3217       val2 =
3218           ((input1 >> 32) & (0xFFFF    <<  0))
3219         | ((input2 >> 16) & (0xFFFF    << 16))
3220         | ((input1 >> 16) & (0xFFFFULL << 32))
3221         | ((input2 >>  0) & (0xFFFFULL << 48));
3222       break;
3223
3224     case 2:
3225       val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3226       val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3227       break;
3228
3229     case 3:
3230       val1 = input1;
3231       val2 = input2;
3232       break;
3233     }
3234
3235   aarch64_set_vec_u64 (cpu, vd, 0, val1);
3236   if (full)
3237     aarch64_set_vec_u64 (cpu, vd, 1, val2);
3238 }
3239
3240 /* Floating point immediates are encoded in 8 bits.
3241    fpimm[7] = sign bit.
3242    fpimm[6:4] = signed exponent.
3243    fpimm[3:0] = fraction (assuming leading 1).
3244    i.e. F = s * 1.f * 2^(e - b).  */
3245
3246 static float
3247 fp_immediate_for_encoding_32 (uint32_t imm8)
3248 {
3249   float u;
3250   uint32_t s, e, f, i;
3251
3252   s = (imm8 >> 7) & 0x1;
3253   e = (imm8 >> 4) & 0x7;
3254   f = imm8 & 0xf;
3255
3256   /* The fp value is s * n/16 * 2r where n is 16+e.  */
3257   u = (16.0 + f) / 16.0;
3258
3259   /* N.B. exponent is signed.  */
3260   if (e < 4)
3261     {
3262       int epos = e;
3263
3264       for (i = 0; i <= epos; i++)
3265         u *= 2.0;
3266     }
3267   else
3268     {
3269       int eneg = 7 - e;
3270
3271       for (i = 0; i < eneg; i++)
3272         u /= 2.0;
3273     }
3274
3275   if (s)
3276     u = - u;
3277
3278   return u;
3279 }
3280
3281 static double
3282 fp_immediate_for_encoding_64 (uint32_t imm8)
3283 {
3284   double u;
3285   uint32_t s, e, f, i;
3286
3287   s = (imm8 >> 7) & 0x1;
3288   e = (imm8 >> 4) & 0x7;
3289   f = imm8 & 0xf;
3290
3291   /* The fp value is s * n/16 * 2r where n is 16+e.  */
3292   u = (16.0 + f) / 16.0;
3293
3294   /* N.B. exponent is signed.  */
3295   if (e < 4)
3296     {
3297       int epos = e;
3298
3299       for (i = 0; i <= epos; i++)
3300         u *= 2.0;
3301     }
3302   else
3303     {
3304       int eneg = 7 - e;
3305
3306       for (i = 0; i < eneg; i++)
3307         u /= 2.0;
3308     }
3309
3310   if (s)
3311     u = - u;
3312
3313   return u;
3314 }
3315
3316 static void
3317 do_vec_MOV_immediate (sim_cpu *cpu)
3318 {
3319   /* instr[31]    = 0
3320      instr[30]    = full/half selector
3321      instr[29,19] = 00111100000
3322      instr[18,16] = high 3 bits of uimm8
3323      instr[15,12] = size & shift:
3324                                   0000 => 32-bit
3325                                   0010 => 32-bit + LSL#8
3326                                   0100 => 32-bit + LSL#16
3327                                   0110 => 32-bit + LSL#24
3328                                   1010 => 16-bit + LSL#8
3329                                   1000 => 16-bit
3330                                   1101 => 32-bit + MSL#16
3331                                   1100 => 32-bit + MSL#8
3332                                   1110 => 8-bit
3333                                   1111 => double
3334      instr[11,10] = 01
3335      instr[9,5]   = low 5-bits of uimm8
3336      instr[4,0]   = Vd.  */
3337
3338   int full     = INSTR (30, 30);
3339   unsigned vd  = INSTR (4, 0);
3340   unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3341   unsigned i;
3342
3343   NYI_assert (29, 19, 0x1E0);
3344   NYI_assert (11, 10, 1);
3345
3346   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3347   switch (INSTR (15, 12))
3348     {
3349     case 0x0: /* 32-bit, no shift.  */
3350     case 0x2: /* 32-bit, shift by 8.  */
3351     case 0x4: /* 32-bit, shift by 16.  */
3352     case 0x6: /* 32-bit, shift by 24.  */
3353       val <<= (8 * INSTR (14, 13));
3354       for (i = 0; i < (full ? 4 : 2); i++)
3355         aarch64_set_vec_u32 (cpu, vd, i, val);
3356       break;
3357
3358     case 0xa: /* 16-bit, shift by 8.  */
3359       val <<= 8;
3360       /* Fall through.  */
3361     case 0x8: /* 16-bit, no shift.  */
3362       for (i = 0; i < (full ? 8 : 4); i++)
3363         aarch64_set_vec_u16 (cpu, vd, i, val);
3364       break;
3365
3366     case 0xd: /* 32-bit, mask shift by 16.  */
3367       val <<= 8;
3368       val |= 0xFF;
3369       /* Fall through.  */
3370     case 0xc: /* 32-bit, mask shift by 8. */
3371       val <<= 8;
3372       val |= 0xFF;
3373       for (i = 0; i < (full ? 4 : 2); i++)
3374         aarch64_set_vec_u32 (cpu, vd, i, val);
3375       break;
3376
3377     case 0xe: /* 8-bit, no shift.  */
3378       for (i = 0; i < (full ? 16 : 8); i++)
3379         aarch64_set_vec_u8 (cpu, vd, i, val);
3380       break;
3381
3382     case 0xf: /* FMOV Vs.{2|4}S, #fpimm.  */
3383       {
3384         float u = fp_immediate_for_encoding_32 (val);
3385         for (i = 0; i < (full ? 4 : 2); i++)
3386           aarch64_set_vec_float (cpu, vd, i, u);
3387         break;
3388       }
3389
3390     default:
3391       HALT_NYI;
3392     }
3393 }
3394
3395 static void
3396 do_vec_MVNI (sim_cpu *cpu)
3397 {
3398   /* instr[31]    = 0
3399      instr[30]    = full/half selector
3400      instr[29,19] = 10111100000
3401      instr[18,16] = high 3 bits of uimm8
3402      instr[15,12] = selector
3403      instr[11,10] = 01
3404      instr[9,5]   = low 5-bits of uimm8
3405      instr[4,0]   = Vd.  */
3406
3407   int full     = INSTR (30, 30);
3408   unsigned vd  = INSTR (4, 0);
3409   unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3410   unsigned i;
3411
3412   NYI_assert (29, 19, 0x5E0);
3413   NYI_assert (11, 10, 1);
3414
3415   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3416   switch (INSTR (15, 12))
3417     {
3418     case 0x0: /* 32-bit, no shift.  */
3419     case 0x2: /* 32-bit, shift by 8.  */
3420     case 0x4: /* 32-bit, shift by 16.  */
3421     case 0x6: /* 32-bit, shift by 24.  */
3422       val <<= (8 * INSTR (14, 13));
3423       val = ~ val;
3424       for (i = 0; i < (full ? 4 : 2); i++)
3425         aarch64_set_vec_u32 (cpu, vd, i, val);
3426       return;
3427
3428     case 0xa: /* 16-bit, 8 bit shift. */
3429       val <<= 8;
3430     case 0x8: /* 16-bit, no shift. */
3431       val = ~ val;
3432       for (i = 0; i < (full ? 8 : 4); i++)
3433         aarch64_set_vec_u16 (cpu, vd, i, val);
3434       return;
3435
3436     case 0xd: /* 32-bit, mask shift by 16.  */
3437       val <<= 8;
3438       val |= 0xFF;
3439     case 0xc: /* 32-bit, mask shift by 8. */
3440       val <<= 8;
3441       val |= 0xFF;
3442       val = ~ val;
3443       for (i = 0; i < (full ? 4 : 2); i++)
3444         aarch64_set_vec_u32 (cpu, vd, i, val);
3445       return;
3446
3447     case 0xE: /* MOVI Dn, #mask64 */
3448       {
3449         uint64_t mask = 0;
3450
3451         for (i = 0; i < 8; i++)
3452           if (val & (1 << i))
3453             mask |= (0xFFUL << (i * 8));
3454         aarch64_set_vec_u64 (cpu, vd, 0, mask);
3455         aarch64_set_vec_u64 (cpu, vd, 1, mask);
3456         return;
3457       }
3458
3459     case 0xf: /* FMOV Vd.2D, #fpimm.  */
3460       {
3461         double u = fp_immediate_for_encoding_64 (val);
3462
3463         if (! full)
3464           HALT_UNALLOC;
3465
3466         aarch64_set_vec_double (cpu, vd, 0, u);
3467         aarch64_set_vec_double (cpu, vd, 1, u);
3468         return;
3469       }
3470
3471     default:
3472       HALT_NYI;
3473     }
3474 }
3475
3476 #define ABS(A) ((A) < 0 ? - (A) : (A))
3477
3478 static void
3479 do_vec_ABS (sim_cpu *cpu)
3480 {
3481   /* instr[31]    = 0
3482      instr[30]    = half(0)/full(1)
3483      instr[29,24] = 00 1110
3484      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3485      instr[21,10] = 10 0000 1011 10
3486      instr[9,5]   = Vn
3487      instr[4.0]   = Vd.  */
3488
3489   unsigned vn = INSTR (9, 5);
3490   unsigned vd = INSTR (4, 0);
3491   unsigned full = INSTR (30, 30);
3492   unsigned i;
3493
3494   NYI_assert (29, 24, 0x0E);
3495   NYI_assert (21, 10, 0x82E);
3496
3497   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3498   switch (INSTR (23, 22))
3499     {
3500     case 0:
3501       for (i = 0; i < (full ? 16 : 8); i++)
3502         aarch64_set_vec_s8 (cpu, vd, i,
3503                             ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3504       break;
3505
3506     case 1:
3507       for (i = 0; i < (full ? 8 : 4); i++)
3508         aarch64_set_vec_s16 (cpu, vd, i,
3509                              ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3510       break;
3511
3512     case 2:
3513       for (i = 0; i < (full ? 4 : 2); i++)
3514         aarch64_set_vec_s32 (cpu, vd, i,
3515                              ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3516       break;
3517
3518     case 3:
3519       if (! full)
3520         HALT_NYI;
3521       for (i = 0; i < 2; i++)
3522         aarch64_set_vec_s64 (cpu, vd, i,
3523                              ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3524       break;
3525     }
3526 }
3527
3528 static void
3529 do_vec_ADDV (sim_cpu *cpu)
3530 {
3531   /* instr[31]    = 0
3532      instr[30]    = full/half selector
3533      instr[29,24] = 00 1110
3534      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3535      instr[21,10] = 11 0001 1011 10
3536      instr[9,5]   = Vm
3537      instr[4.0]   = Rd.  */
3538
3539   unsigned vm = INSTR (9, 5);
3540   unsigned rd = INSTR (4, 0);
3541   unsigned i;
3542   int      full = INSTR (30, 30);
3543
3544   NYI_assert (29, 24, 0x0E);
3545   NYI_assert (21, 10, 0xC6E);
3546
3547   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3548   switch (INSTR (23, 22))
3549     {
3550     case 0:
3551       {
3552         uint8_t val = 0;
3553         for (i = 0; i < (full ? 16 : 8); i++)
3554           val += aarch64_get_vec_u8 (cpu, vm, i);
3555         aarch64_set_vec_u64 (cpu, rd, 0, val);
3556         return;
3557       }
3558
3559     case 1:
3560       {
3561         uint16_t val = 0;
3562         for (i = 0; i < (full ? 8 : 4); i++)
3563           val += aarch64_get_vec_u16 (cpu, vm, i);
3564         aarch64_set_vec_u64 (cpu, rd, 0, val);
3565         return;
3566       }
3567
3568     case 2:
3569       {
3570         uint32_t val = 0;
3571         if (! full)
3572           HALT_UNALLOC;
3573         for (i = 0; i < 4; i++)
3574           val += aarch64_get_vec_u32 (cpu, vm, i);
3575         aarch64_set_vec_u64 (cpu, rd, 0, val);
3576         return;
3577       }
3578
3579     case 3:
3580       HALT_UNALLOC;
3581     }
3582 }
3583
3584 static void
3585 do_vec_ins_2 (sim_cpu *cpu)
3586 {
3587   /* instr[31,21] = 01001110000
3588      instr[20,18] = size & element selector
3589      instr[17,14] = 0000
3590      instr[13]    = direction: to vec(0), from vec (1)
3591      instr[12,10] = 111
3592      instr[9,5]   = Vm
3593      instr[4,0]   = Vd.  */
3594
3595   unsigned elem;
3596   unsigned vm = INSTR (9, 5);
3597   unsigned vd = INSTR (4, 0);
3598
3599   NYI_assert (31, 21, 0x270);
3600   NYI_assert (17, 14, 0);
3601   NYI_assert (12, 10, 7);
3602
3603   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3604   if (INSTR (13, 13) == 1)
3605     {
3606       if (INSTR (18, 18) == 1)
3607         {
3608           /* 32-bit moves.  */
3609           elem = INSTR (20, 19);
3610           aarch64_set_reg_u64 (cpu, vd, NO_SP,
3611                                aarch64_get_vec_u32 (cpu, vm, elem));
3612         }
3613       else
3614         {
3615           /* 64-bit moves.  */
3616           if (INSTR (19, 19) != 1)
3617             HALT_NYI;
3618
3619           elem = INSTR (20, 20);
3620           aarch64_set_reg_u64 (cpu, vd, NO_SP,
3621                                aarch64_get_vec_u64 (cpu, vm, elem));
3622         }
3623     }
3624   else
3625     {
3626       if (INSTR (18, 18) == 1)
3627         {
3628           /* 32-bit moves.  */
3629           elem = INSTR (20, 19);
3630           aarch64_set_vec_u32 (cpu, vd, elem,
3631                                aarch64_get_reg_u32 (cpu, vm, NO_SP));
3632         }
3633       else
3634         {
3635           /* 64-bit moves.  */
3636           if (INSTR (19, 19) != 1)
3637             HALT_NYI;
3638
3639           elem = INSTR (20, 20);
3640           aarch64_set_vec_u64 (cpu, vd, elem,
3641                                aarch64_get_reg_u64 (cpu, vm, NO_SP));
3642         }
3643     }
3644 }
3645
3646 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE)   \
3647   do                                                              \
3648     {                                                             \
3649       DST_TYPE a[N], b[N];                                        \
3650                                                                   \
3651       for (i = 0; i < (N); i++)                                   \
3652         {                                                         \
3653           a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3654           b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3655         }                                                         \
3656       for (i = 0; i < (N); i++)                                   \
3657         aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]);   \
3658     }                                                             \
3659   while (0)
3660
3661 static void
3662 do_vec_mull (sim_cpu *cpu)
3663 {
3664   /* instr[31]    = 0
3665      instr[30]    = lower(0)/upper(1) selector
3666      instr[29]    = signed(0)/unsigned(1)
3667      instr[28,24] = 0 1110
3668      instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3669      instr[21]    = 1
3670      instr[20,16] = Vm
3671      instr[15,10] = 11 0000
3672      instr[9,5]   = Vn
3673      instr[4.0]   = Vd.  */
3674
3675   int    unsign = INSTR (29, 29);
3676   int    bias = INSTR (30, 30);
3677   unsigned vm = INSTR (20, 16);
3678   unsigned vn = INSTR ( 9,  5);
3679   unsigned vd = INSTR ( 4,  0);
3680   unsigned i;
3681
3682   NYI_assert (28, 24, 0x0E);
3683   NYI_assert (15, 10, 0x30);
3684
3685   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3686   /* NB: Read source values before writing results, in case
3687      the source and destination vectors are the same.  */
3688   switch (INSTR (23, 22))
3689     {
3690     case 0:
3691       if (bias)
3692         bias = 8;
3693       if (unsign)
3694         DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3695       else
3696         DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3697       return;
3698
3699     case 1:
3700       if (bias)
3701         bias = 4;
3702       if (unsign)
3703         DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3704       else
3705         DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3706       return;
3707
3708     case 2:
3709       if (bias)
3710         bias = 2;
3711       if (unsign)
3712         DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3713       else
3714         DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3715       return;
3716
3717     case 3:
3718       HALT_NYI;
3719     }
3720 }
3721
3722 static void
3723 do_vec_fadd (sim_cpu *cpu)
3724 {
3725   /* instr[31]    = 0
3726      instr[30]    = half(0)/full(1)
3727      instr[29,24] = 001110
3728      instr[23]    = FADD(0)/FSUB(1)
3729      instr[22]    = float (0)/double(1)
3730      instr[21]    = 1
3731      instr[20,16] = Vm
3732      instr[15,10] = 110101
3733      instr[9,5]   = Vn
3734      instr[4.0]   = Vd.  */
3735
3736   unsigned vm = INSTR (20, 16);
3737   unsigned vn = INSTR (9, 5);
3738   unsigned vd = INSTR (4, 0);
3739   unsigned i;
3740   int      full = INSTR (30, 30);
3741
3742   NYI_assert (29, 24, 0x0E);
3743   NYI_assert (21, 21, 1);
3744   NYI_assert (15, 10, 0x35);
3745
3746   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3747   if (INSTR (23, 23))
3748     {
3749       if (INSTR (22, 22))
3750         {
3751           if (! full)
3752             HALT_NYI;
3753
3754           for (i = 0; i < 2; i++)
3755             aarch64_set_vec_double (cpu, vd, i,
3756                                     aarch64_get_vec_double (cpu, vn, i)
3757                                     - aarch64_get_vec_double (cpu, vm, i));
3758         }
3759       else
3760         {
3761           for (i = 0; i < (full ? 4 : 2); i++)
3762             aarch64_set_vec_float (cpu, vd, i,
3763                                    aarch64_get_vec_float (cpu, vn, i)
3764                                    - aarch64_get_vec_float (cpu, vm, i));
3765         }
3766     }
3767   else
3768     {
3769       if (INSTR (22, 22))
3770         {
3771           if (! full)
3772             HALT_NYI;
3773
3774           for (i = 0; i < 2; i++)
3775             aarch64_set_vec_double (cpu, vd, i,
3776                                     aarch64_get_vec_double (cpu, vm, i)
3777                                     + aarch64_get_vec_double (cpu, vn, i));
3778         }
3779       else
3780         {
3781           for (i = 0; i < (full ? 4 : 2); i++)
3782             aarch64_set_vec_float (cpu, vd, i,
3783                                    aarch64_get_vec_float (cpu, vm, i)
3784                                    + aarch64_get_vec_float (cpu, vn, i));
3785         }
3786     }
3787 }
3788
3789 static void
3790 do_vec_add (sim_cpu *cpu)
3791 {
3792   /* instr[31]    = 0
3793      instr[30]    = full/half selector
3794      instr[29,24] = 001110
3795      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3796      instr[21]    = 1
3797      instr[20,16] = Vn
3798      instr[15,10] = 100001
3799      instr[9,5]   = Vm
3800      instr[4.0]   = Vd.  */
3801
3802   unsigned vm = INSTR (20, 16);
3803   unsigned vn = INSTR (9, 5);
3804   unsigned vd = INSTR (4, 0);
3805   unsigned i;
3806   int      full = INSTR (30, 30);
3807
3808   NYI_assert (29, 24, 0x0E);
3809   NYI_assert (21, 21, 1);
3810   NYI_assert (15, 10, 0x21);
3811
3812   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3813   switch (INSTR (23, 22))
3814     {
3815     case 0:
3816       for (i = 0; i < (full ? 16 : 8); i++)
3817         aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3818                             + aarch64_get_vec_u8 (cpu, vm, i));
3819       return;
3820
3821     case 1:
3822       for (i = 0; i < (full ? 8 : 4); i++)
3823         aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3824                              + aarch64_get_vec_u16 (cpu, vm, i));
3825       return;
3826
3827     case 2:
3828       for (i = 0; i < (full ? 4 : 2); i++)
3829         aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3830                              + aarch64_get_vec_u32 (cpu, vm, i));
3831       return;
3832
3833     case 3:
3834       if (! full)
3835         HALT_UNALLOC;
3836       aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3837                            + aarch64_get_vec_u64 (cpu, vm, 0));
3838       aarch64_set_vec_u64 (cpu, vd, 1,
3839                            aarch64_get_vec_u64 (cpu, vn, 1)
3840                            + aarch64_get_vec_u64 (cpu, vm, 1));
3841       return;
3842     }
3843 }
3844
3845 static void
3846 do_vec_mul (sim_cpu *cpu)
3847 {
3848   /* instr[31]    = 0
3849      instr[30]    = full/half selector
3850      instr[29,24] = 00 1110
3851      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3852      instr[21]    = 1
3853      instr[20,16] = Vn
3854      instr[15,10] = 10 0111
3855      instr[9,5]   = Vm
3856      instr[4.0]   = Vd.  */
3857
3858   unsigned vm = INSTR (20, 16);
3859   unsigned vn = INSTR (9, 5);
3860   unsigned vd = INSTR (4, 0);
3861   unsigned i;
3862   int      full = INSTR (30, 30);
3863   int      bias = 0;
3864
3865   NYI_assert (29, 24, 0x0E);
3866   NYI_assert (21, 21, 1);
3867   NYI_assert (15, 10, 0x27);
3868
3869   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3870   switch (INSTR (23, 22))
3871     {
3872     case 0:
3873       DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3874       return;
3875
3876     case 1:
3877       DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3878       return;
3879
3880     case 2:
3881       DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3882       return;
3883
3884     case 3:
3885       HALT_UNALLOC;
3886     }
3887 }
3888
3889 static void
3890 do_vec_MLA (sim_cpu *cpu)
3891 {
3892   /* instr[31]    = 0
3893      instr[30]    = full/half selector
3894      instr[29,24] = 00 1110
3895      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3896      instr[21]    = 1
3897      instr[20,16] = Vn
3898      instr[15,10] = 1001 01
3899      instr[9,5]   = Vm
3900      instr[4.0]   = Vd.  */
3901
3902   unsigned vm = INSTR (20, 16);
3903   unsigned vn = INSTR (9, 5);
3904   unsigned vd = INSTR (4, 0);
3905   unsigned i;
3906   int      full = INSTR (30, 30);
3907
3908   NYI_assert (29, 24, 0x0E);
3909   NYI_assert (21, 21, 1);
3910   NYI_assert (15, 10, 0x25);
3911
3912   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3913   switch (INSTR (23, 22))
3914     {
3915     case 0:
3916       for (i = 0; i < (full ? 16 : 8); i++)
3917         aarch64_set_vec_u8 (cpu, vd, i,
3918                             aarch64_get_vec_u8 (cpu, vd, i)
3919                             + (aarch64_get_vec_u8 (cpu, vn, i)
3920                                * aarch64_get_vec_u8 (cpu, vm, i)));
3921       return;
3922
3923     case 1:
3924       for (i = 0; i < (full ? 8 : 4); i++)
3925         aarch64_set_vec_u16 (cpu, vd, i,
3926                              aarch64_get_vec_u16 (cpu, vd, i)
3927                              + (aarch64_get_vec_u16 (cpu, vn, i)
3928                                 * aarch64_get_vec_u16 (cpu, vm, i)));
3929       return;
3930
3931     case 2:
3932       for (i = 0; i < (full ? 4 : 2); i++)
3933         aarch64_set_vec_u32 (cpu, vd, i,
3934                              aarch64_get_vec_u32 (cpu, vd, i)
3935                              + (aarch64_get_vec_u32 (cpu, vn, i)
3936                                 * aarch64_get_vec_u32 (cpu, vm, i)));
3937       return;
3938
3939     default:
3940       HALT_UNALLOC;
3941     }
3942 }
3943
3944 static float
3945 fmaxnm (float a, float b)
3946 {
3947   if (! isnan (a))
3948     {
3949       if (! isnan (b))
3950         return a > b ? a : b;
3951       return a;
3952     }
3953   else if (! isnan (b))
3954     return b;
3955   return a;
3956 }
3957
3958 static float
3959 fminnm (float a, float b)
3960 {
3961   if (! isnan (a))
3962     {
3963       if (! isnan (b))
3964         return a < b ? a : b;
3965       return a;
3966     }
3967   else if (! isnan (b))
3968     return b;
3969   return a;
3970 }
3971
3972 static double
3973 dmaxnm (double a, double b)
3974 {
3975   if (! isnan (a))
3976     {
3977       if (! isnan (b))
3978         return a > b ? a : b;
3979       return a;
3980     }
3981   else if (! isnan (b))
3982     return b;
3983   return a;
3984 }
3985
3986 static double
3987 dminnm (double a, double b)
3988 {
3989   if (! isnan (a))
3990     {
3991       if (! isnan (b))
3992         return a < b ? a : b;
3993       return a;
3994     }
3995   else if (! isnan (b))
3996     return b;
3997   return a;
3998 }
3999
4000 static void
4001 do_vec_FminmaxNMP (sim_cpu *cpu)
4002 {
4003   /* instr [31]    = 0
4004      instr [30]    = half (0)/full (1)
4005      instr [29,24] = 10 1110
4006      instr [23]    = max(0)/min(1)
4007      instr [22]    = float (0)/double (1)
4008      instr [21]    = 1
4009      instr [20,16] = Vn
4010      instr [15,10] = 1100 01
4011      instr [9,5]   = Vm
4012      instr [4.0]   = Vd.  */
4013
4014   unsigned vm = INSTR (20, 16);
4015   unsigned vn = INSTR (9, 5);
4016   unsigned vd = INSTR (4, 0);
4017   int      full = INSTR (30, 30);
4018
4019   NYI_assert (29, 24, 0x2E);
4020   NYI_assert (21, 21, 1);
4021   NYI_assert (15, 10, 0x31);
4022
4023   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4024   if (INSTR (22, 22))
4025     {
4026       double (* fn)(double, double) = INSTR (23, 23)
4027         ? dminnm : dmaxnm;
4028
4029       if (! full)
4030         HALT_NYI;
4031       aarch64_set_vec_double (cpu, vd, 0,
4032                               fn (aarch64_get_vec_double (cpu, vn, 0),
4033                                   aarch64_get_vec_double (cpu, vn, 1)));
4034       aarch64_set_vec_double (cpu, vd, 0,
4035                               fn (aarch64_get_vec_double (cpu, vm, 0),
4036                                   aarch64_get_vec_double (cpu, vm, 1)));
4037     }
4038   else
4039     {
4040       float (* fn)(float, float) = INSTR (23, 23)
4041         ? fminnm : fmaxnm;
4042
4043       aarch64_set_vec_float (cpu, vd, 0,
4044                              fn (aarch64_get_vec_float (cpu, vn, 0),
4045                                  aarch64_get_vec_float (cpu, vn, 1)));
4046       if (full)
4047         aarch64_set_vec_float (cpu, vd, 1,
4048                                fn (aarch64_get_vec_float (cpu, vn, 2),
4049                                    aarch64_get_vec_float (cpu, vn, 3)));
4050
4051       aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
4052                              fn (aarch64_get_vec_float (cpu, vm, 0),
4053                                  aarch64_get_vec_float (cpu, vm, 1)));
4054       if (full)
4055         aarch64_set_vec_float (cpu, vd, 3,
4056                                fn (aarch64_get_vec_float (cpu, vm, 2),
4057                                    aarch64_get_vec_float (cpu, vm, 3)));
4058     }
4059 }
4060
4061 static void
4062 do_vec_AND (sim_cpu *cpu)
4063 {
4064   /* instr[31]    = 0
4065      instr[30]    = half (0)/full (1)
4066      instr[29,21] = 001110001
4067      instr[20,16] = Vm
4068      instr[15,10] = 000111
4069      instr[9,5]   = Vn
4070      instr[4.0]   = Vd.  */
4071
4072   unsigned vm = INSTR (20, 16);
4073   unsigned vn = INSTR (9, 5);
4074   unsigned vd = INSTR (4, 0);
4075   unsigned i;
4076   int      full = INSTR (30, 30);
4077
4078   NYI_assert (29, 21, 0x071);
4079   NYI_assert (15, 10, 0x07);
4080
4081   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4082   for (i = 0; i < (full ? 4 : 2); i++)
4083     aarch64_set_vec_u32 (cpu, vd, i,
4084                          aarch64_get_vec_u32 (cpu, vn, i)
4085                          & aarch64_get_vec_u32 (cpu, vm, i));
4086 }
4087
4088 static void
4089 do_vec_BSL (sim_cpu *cpu)
4090 {
4091   /* instr[31]    = 0
4092      instr[30]    = half (0)/full (1)
4093      instr[29,21] = 101110011
4094      instr[20,16] = Vm
4095      instr[15,10] = 000111
4096      instr[9,5]   = Vn
4097      instr[4.0]   = Vd.  */
4098
4099   unsigned vm = INSTR (20, 16);
4100   unsigned vn = INSTR (9, 5);
4101   unsigned vd = INSTR (4, 0);
4102   unsigned i;
4103   int      full = INSTR (30, 30);
4104
4105   NYI_assert (29, 21, 0x173);
4106   NYI_assert (15, 10, 0x07);
4107
4108   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4109   for (i = 0; i < (full ? 16 : 8); i++)
4110     aarch64_set_vec_u8 (cpu, vd, i,
4111                         (    aarch64_get_vec_u8 (cpu, vd, i)
4112                            & aarch64_get_vec_u8 (cpu, vn, i))
4113                         | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4114                            & aarch64_get_vec_u8 (cpu, vm, i)));
4115 }
4116
4117 static void
4118 do_vec_EOR (sim_cpu *cpu)
4119 {
4120   /* instr[31]    = 0
4121      instr[30]    = half (0)/full (1)
4122      instr[29,21] = 10 1110 001
4123      instr[20,16] = Vm
4124      instr[15,10] = 000111
4125      instr[9,5]   = Vn
4126      instr[4.0]   = Vd.  */
4127
4128   unsigned vm = INSTR (20, 16);
4129   unsigned vn = INSTR (9, 5);
4130   unsigned vd = INSTR (4, 0);
4131   unsigned i;
4132   int      full = INSTR (30, 30);
4133
4134   NYI_assert (29, 21, 0x171);
4135   NYI_assert (15, 10, 0x07);
4136
4137   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4138   for (i = 0; i < (full ? 4 : 2); i++)
4139     aarch64_set_vec_u32 (cpu, vd, i,
4140                          aarch64_get_vec_u32 (cpu, vn, i)
4141                          ^ aarch64_get_vec_u32 (cpu, vm, i));
4142 }
4143
4144 static void
4145 do_vec_bit (sim_cpu *cpu)
4146 {
4147   /* instr[31]    = 0
4148      instr[30]    = half (0)/full (1)
4149      instr[29,23] = 10 1110 1
4150      instr[22]    = BIT (0) / BIF (1)
4151      instr[21]    = 1
4152      instr[20,16] = Vm
4153      instr[15,10] = 0001 11
4154      instr[9,5]   = Vn
4155      instr[4.0]   = Vd.  */
4156
4157   unsigned vm = INSTR (20, 16);
4158   unsigned vn = INSTR (9, 5);
4159   unsigned vd = INSTR (4, 0);
4160   unsigned full = INSTR (30, 30);
4161   unsigned test_false = INSTR (22, 22);
4162   unsigned i;
4163
4164   NYI_assert (29, 23, 0x5D);
4165   NYI_assert (21, 21, 1);
4166   NYI_assert (15, 10, 0x07);
4167
4168   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4169   for (i = 0; i < (full ? 4 : 2); i++)
4170     {
4171       uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
4172       uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
4173       uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
4174       if (test_false)
4175         aarch64_set_vec_u32 (cpu, vd, i,
4176                              (vd_val & vm_val) | (vn_val & ~vm_val));
4177       else
4178         aarch64_set_vec_u32 (cpu, vd, i,
4179                              (vd_val & ~vm_val) | (vn_val & vm_val));
4180     }
4181 }
4182
4183 static void
4184 do_vec_ORN (sim_cpu *cpu)
4185 {
4186   /* instr[31]    = 0
4187      instr[30]    = half (0)/full (1)
4188      instr[29,21] = 00 1110 111
4189      instr[20,16] = Vm
4190      instr[15,10] = 00 0111
4191      instr[9,5]   = Vn
4192      instr[4.0]   = Vd.  */
4193
4194   unsigned vm = INSTR (20, 16);
4195   unsigned vn = INSTR (9, 5);
4196   unsigned vd = INSTR (4, 0);
4197   unsigned i;
4198   int      full = INSTR (30, 30);
4199
4200   NYI_assert (29, 21, 0x077);
4201   NYI_assert (15, 10, 0x07);
4202
4203   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4204   for (i = 0; i < (full ? 16 : 8); i++)
4205     aarch64_set_vec_u8 (cpu, vd, i,
4206                         aarch64_get_vec_u8 (cpu, vn, i)
4207                         | ~ aarch64_get_vec_u8 (cpu, vm, i));
4208 }
4209
4210 static void
4211 do_vec_ORR (sim_cpu *cpu)
4212 {
4213   /* instr[31]    = 0
4214      instr[30]    = half (0)/full (1)
4215      instr[29,21] = 00 1110 101
4216      instr[20,16] = Vm
4217      instr[15,10] = 0001 11
4218      instr[9,5]   = Vn
4219      instr[4.0]   = Vd.  */
4220
4221   unsigned vm = INSTR (20, 16);
4222   unsigned vn = INSTR (9, 5);
4223   unsigned vd = INSTR (4, 0);
4224   unsigned i;
4225   int      full = INSTR (30, 30);
4226
4227   NYI_assert (29, 21, 0x075);
4228   NYI_assert (15, 10, 0x07);
4229
4230   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4231   for (i = 0; i < (full ? 16 : 8); i++)
4232     aarch64_set_vec_u8 (cpu, vd, i,
4233                         aarch64_get_vec_u8 (cpu, vn, i)
4234                         | aarch64_get_vec_u8 (cpu, vm, i));
4235 }
4236
4237 static void
4238 do_vec_BIC (sim_cpu *cpu)
4239 {
4240   /* instr[31]    = 0
4241      instr[30]    = half (0)/full (1)
4242      instr[29,21] = 00 1110 011
4243      instr[20,16] = Vm
4244      instr[15,10] = 00 0111
4245      instr[9,5]   = Vn
4246      instr[4.0]   = Vd.  */
4247
4248   unsigned vm = INSTR (20, 16);
4249   unsigned vn = INSTR (9, 5);
4250   unsigned vd = INSTR (4, 0);
4251   unsigned i;
4252   int      full = INSTR (30, 30);
4253
4254   NYI_assert (29, 21, 0x073);
4255   NYI_assert (15, 10, 0x07);
4256
4257   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4258   for (i = 0; i < (full ? 16 : 8); i++)
4259     aarch64_set_vec_u8 (cpu, vd, i,
4260                         aarch64_get_vec_u8 (cpu, vn, i)
4261                         & ~ aarch64_get_vec_u8 (cpu, vm, i));
4262 }
4263
4264 static void
4265 do_vec_XTN (sim_cpu *cpu)
4266 {
4267   /* instr[31]    = 0
4268      instr[30]    = first part (0)/ second part (1)
4269      instr[29,24] = 00 1110
4270      instr[23,22] = size: byte(00), half(01), word (10)
4271      instr[21,10] = 1000 0100 1010
4272      instr[9,5]   = Vs
4273      instr[4,0]   = Vd.  */
4274
4275   unsigned vs = INSTR (9, 5);
4276   unsigned vd = INSTR (4, 0);
4277   unsigned bias = INSTR (30, 30);
4278   unsigned i;
4279
4280   NYI_assert (29, 24, 0x0E);
4281   NYI_assert (21, 10, 0x84A);
4282
4283   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4284   switch (INSTR (23, 22))
4285     {
4286     case 0:
4287       for (i = 0; i < 8; i++)
4288         aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4289                             aarch64_get_vec_u16 (cpu, vs, i));
4290       return;
4291
4292     case 1:
4293       for (i = 0; i < 4; i++)
4294         aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4295                              aarch64_get_vec_u32 (cpu, vs, i));
4296       return;
4297
4298     case 2:
4299       for (i = 0; i < 2; i++)
4300         aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4301                              aarch64_get_vec_u64 (cpu, vs, i));
4302       return;
4303     }
4304 }
4305
4306 /* Return the number of bits set in the input value.  */
4307 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
4308 # define popcount __builtin_popcount
4309 #else
4310 static int
4311 popcount (unsigned char x)
4312 {
4313   static const unsigned char popcnt[16] =
4314     {
4315       0, 1, 1, 2,
4316       1, 2, 2, 3,
4317       1, 2, 2, 3,
4318       2, 3, 3, 4
4319     };
4320
4321   /* Only counts the low 8 bits of the input as that is all we need.  */
4322   return popcnt[x % 16] + popcnt[x / 16];
4323 }
4324 #endif
4325
4326 static void
4327 do_vec_CNT (sim_cpu *cpu)
4328 {
4329   /* instr[31]    = 0
4330      instr[30]    = half (0)/ full (1)
4331      instr[29,24] = 00 1110
4332      instr[23,22] = size: byte(00)
4333      instr[21,10] = 1000 0001 0110
4334      instr[9,5]   = Vs
4335      instr[4,0]   = Vd.  */
4336
4337   unsigned vs = INSTR (9, 5);
4338   unsigned vd = INSTR (4, 0);
4339   int full = INSTR (30, 30);
4340   int size = INSTR (23, 22);
4341   int i;
4342
4343   NYI_assert (29, 24, 0x0E);
4344   NYI_assert (21, 10, 0x816);
4345
4346   if (size != 0)
4347     HALT_UNALLOC;
4348
4349   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4350
4351   for (i = 0; i < (full ? 16 : 8); i++)
4352     aarch64_set_vec_u8 (cpu, vd, i,
4353                         popcount (aarch64_get_vec_u8 (cpu, vs, i)));
4354 }
4355
4356 static void
4357 do_vec_maxv (sim_cpu *cpu)
4358 {
4359   /* instr[31]    = 0
4360      instr[30]    = half(0)/full(1)
4361      instr[29]    = signed (0)/unsigned(1)
4362      instr[28,24] = 0 1110
4363      instr[23,22] = size: byte(00), half(01), word (10)
4364      instr[21]    = 1
4365      instr[20,17] = 1 000
4366      instr[16]    = max(0)/min(1)
4367      instr[15,10] = 1010 10
4368      instr[9,5]   = V source
4369      instr[4.0]   = R dest.  */
4370
4371   unsigned vs = INSTR (9, 5);
4372   unsigned rd = INSTR (4, 0);
4373   unsigned full = INSTR (30, 30);
4374   unsigned i;
4375
4376   NYI_assert (28, 24, 0x0E);
4377   NYI_assert (21, 21, 1);
4378   NYI_assert (20, 17, 8);
4379   NYI_assert (15, 10, 0x2A);
4380
4381   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4382   switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4383     {
4384     case 0: /* SMAXV.  */
4385        {
4386         int64_t smax;
4387         switch (INSTR (23, 22))
4388           {
4389           case 0:
4390             smax = aarch64_get_vec_s8 (cpu, vs, 0);
4391             for (i = 1; i < (full ? 16 : 8); i++)
4392               smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4393             break;
4394           case 1:
4395             smax = aarch64_get_vec_s16 (cpu, vs, 0);
4396             for (i = 1; i < (full ? 8 : 4); i++)
4397               smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4398             break;
4399           case 2:
4400             smax = aarch64_get_vec_s32 (cpu, vs, 0);
4401             for (i = 1; i < (full ? 4 : 2); i++)
4402               smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4403             break;
4404           case 3:
4405             HALT_UNALLOC;
4406           }
4407         aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4408         return;
4409       }
4410
4411     case 1: /* SMINV.  */
4412       {
4413         int64_t smin;
4414         switch (INSTR (23, 22))
4415           {
4416           case 0:
4417             smin = aarch64_get_vec_s8 (cpu, vs, 0);
4418             for (i = 1; i < (full ? 16 : 8); i++)
4419               smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4420             break;
4421           case 1:
4422             smin = aarch64_get_vec_s16 (cpu, vs, 0);
4423             for (i = 1; i < (full ? 8 : 4); i++)
4424               smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4425             break;
4426           case 2:
4427             smin = aarch64_get_vec_s32 (cpu, vs, 0);
4428             for (i = 1; i < (full ? 4 : 2); i++)
4429               smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4430             break;
4431
4432           case 3:
4433             HALT_UNALLOC;
4434           }
4435         aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4436         return;
4437       }
4438
4439     case 2: /* UMAXV.  */
4440       {
4441         uint64_t umax;
4442         switch (INSTR (23, 22))
4443           {
4444           case 0:
4445             umax = aarch64_get_vec_u8 (cpu, vs, 0);
4446             for (i = 1; i < (full ? 16 : 8); i++)
4447               umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4448             break;
4449           case 1:
4450             umax = aarch64_get_vec_u16 (cpu, vs, 0);
4451             for (i = 1; i < (full ? 8 : 4); i++)
4452               umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4453             break;
4454           case 2:
4455             umax = aarch64_get_vec_u32 (cpu, vs, 0);
4456             for (i = 1; i < (full ? 4 : 2); i++)
4457               umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4458             break;
4459
4460           case 3:
4461             HALT_UNALLOC;
4462           }
4463         aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4464         return;
4465       }
4466
4467     case 3: /* UMINV.  */
4468       {
4469         uint64_t umin;
4470         switch (INSTR (23, 22))
4471           {
4472           case 0:
4473             umin = aarch64_get_vec_u8 (cpu, vs, 0);
4474             for (i = 1; i < (full ? 16 : 8); i++)
4475               umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4476             break;
4477           case 1:
4478             umin = aarch64_get_vec_u16 (cpu, vs, 0);
4479             for (i = 1; i < (full ? 8 : 4); i++)
4480               umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4481             break;
4482           case 2:
4483             umin = aarch64_get_vec_u32 (cpu, vs, 0);
4484             for (i = 1; i < (full ? 4 : 2); i++)
4485               umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4486             break;
4487
4488           case 3:
4489             HALT_UNALLOC;
4490           }
4491         aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4492         return;
4493       }
4494     }
4495 }
4496
4497 static void
4498 do_vec_fminmaxV (sim_cpu *cpu)
4499 {
4500   /* instr[31,24] = 0110 1110
4501      instr[23]    = max(0)/min(1)
4502      instr[22,14] = 011 0000 11
4503      instr[13,12] = nm(00)/normal(11)
4504      instr[11,10] = 10
4505      instr[9,5]   = V source
4506      instr[4.0]   = R dest.  */
4507
4508   unsigned vs = INSTR (9, 5);
4509   unsigned rd = INSTR (4, 0);
4510   unsigned i;
4511   float res   = aarch64_get_vec_float (cpu, vs, 0);
4512
4513   NYI_assert (31, 24, 0x6E);
4514   NYI_assert (22, 14, 0x0C3);
4515   NYI_assert (11, 10, 2);
4516
4517   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4518   if (INSTR (23, 23))
4519     {
4520       switch (INSTR (13, 12))
4521         {
4522         case 0: /* FMNINNMV.  */
4523           for (i = 1; i < 4; i++)
4524             res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4525           break;
4526
4527         case 3: /* FMINV.  */
4528           for (i = 1; i < 4; i++)
4529             res = min (res, aarch64_get_vec_float (cpu, vs, i));
4530           break;
4531
4532         default:
4533           HALT_NYI;
4534         }
4535     }
4536   else
4537     {
4538       switch (INSTR (13, 12))
4539         {
4540         case 0: /* FMNAXNMV.  */
4541           for (i = 1; i < 4; i++)
4542             res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4543           break;
4544
4545         case 3: /* FMAXV.  */
4546           for (i = 1; i < 4; i++)
4547             res = max (res, aarch64_get_vec_float (cpu, vs, i));
4548           break;
4549
4550         default:
4551           HALT_NYI;
4552         }
4553     }
4554
4555   aarch64_set_FP_float (cpu, rd, res);
4556 }
4557
4558 static void
4559 do_vec_Fminmax (sim_cpu *cpu)
4560 {
4561   /* instr[31]    = 0
4562      instr[30]    = half(0)/full(1)
4563      instr[29,24] = 00 1110
4564      instr[23]    = max(0)/min(1)
4565      instr[22]    = float(0)/double(1)
4566      instr[21]    = 1
4567      instr[20,16] = Vm
4568      instr[15,14] = 11
4569      instr[13,12] = nm(00)/normal(11)
4570      instr[11,10] = 01
4571      instr[9,5]   = Vn
4572      instr[4,0]   = Vd.  */
4573
4574   unsigned vm = INSTR (20, 16);
4575   unsigned vn = INSTR (9, 5);
4576   unsigned vd = INSTR (4, 0);
4577   unsigned full = INSTR (30, 30);
4578   unsigned min = INSTR (23, 23);
4579   unsigned i;
4580
4581   NYI_assert (29, 24, 0x0E);
4582   NYI_assert (21, 21, 1);
4583   NYI_assert (15, 14, 3);
4584   NYI_assert (11, 10, 1);
4585
4586   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4587   if (INSTR (22, 22))
4588     {
4589       double (* func)(double, double);
4590
4591       if (! full)
4592         HALT_NYI;
4593
4594       if (INSTR (13, 12) == 0)
4595         func = min ? dminnm : dmaxnm;
4596       else if (INSTR (13, 12) == 3)
4597         func = min ? fmin : fmax;
4598       else
4599         HALT_NYI;
4600
4601       for (i = 0; i < 2; i++)
4602         aarch64_set_vec_double (cpu, vd, i,
4603                                 func (aarch64_get_vec_double (cpu, vn, i),
4604                                       aarch64_get_vec_double (cpu, vm, i)));
4605     }
4606   else
4607     {
4608       float (* func)(float, float);
4609
4610       if (INSTR (13, 12) == 0)
4611         func = min ? fminnm : fmaxnm;
4612       else if (INSTR (13, 12) == 3)
4613         func = min ? fminf : fmaxf;
4614       else
4615         HALT_NYI;
4616
4617       for (i = 0; i < (full ? 4 : 2); i++)
4618         aarch64_set_vec_float (cpu, vd, i,
4619                                func (aarch64_get_vec_float (cpu, vn, i),
4620                                      aarch64_get_vec_float (cpu, vm, i)));
4621     }
4622 }
4623
4624 static void
4625 do_vec_SCVTF (sim_cpu *cpu)
4626 {
4627   /* instr[31]    = 0
4628      instr[30]    = Q
4629      instr[29,23] = 00 1110 0
4630      instr[22]    = float(0)/double(1)
4631      instr[21,10] = 10 0001 1101 10
4632      instr[9,5]   = Vn
4633      instr[4,0]   = Vd.  */
4634
4635   unsigned vn = INSTR (9, 5);
4636   unsigned vd = INSTR (4, 0);
4637   unsigned full = INSTR (30, 30);
4638   unsigned size = INSTR (22, 22);
4639   unsigned i;
4640
4641   NYI_assert (29, 23, 0x1C);
4642   NYI_assert (21, 10, 0x876);
4643
4644   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4645   if (size)
4646     {
4647       if (! full)
4648         HALT_UNALLOC;
4649
4650       for (i = 0; i < 2; i++)
4651         {
4652           double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4653           aarch64_set_vec_double (cpu, vd, i, val);
4654         }
4655     }
4656   else
4657     {
4658       for (i = 0; i < (full ? 4 : 2); i++)
4659         {
4660           float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4661           aarch64_set_vec_float (cpu, vd, i, val);
4662         }
4663     }
4664 }
4665
4666 #define VEC_CMP(SOURCE, CMP)                                            \
4667   do                                                                    \
4668     {                                                                   \
4669       switch (size)                                                     \
4670         {                                                               \
4671         case 0:                                                         \
4672           for (i = 0; i < (full ? 16 : 8); i++)                         \
4673             aarch64_set_vec_u8 (cpu, vd, i,                             \
4674                                 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4675                                 CMP                                     \
4676                                 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4677                                 ? -1 : 0);                              \
4678           return;                                                       \
4679         case 1:                                                         \
4680           for (i = 0; i < (full ? 8 : 4); i++)                          \
4681             aarch64_set_vec_u16 (cpu, vd, i,                            \
4682                                  aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4683                                  CMP                                    \
4684                                  aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4685                                  ? -1 : 0);                             \
4686           return;                                                       \
4687         case 2:                                                         \
4688           for (i = 0; i < (full ? 4 : 2); i++)                          \
4689             aarch64_set_vec_u32 (cpu, vd, i, \
4690                                  aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4691                                  CMP                                    \
4692                                  aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4693                                  ? -1 : 0);                             \
4694           return;                                                       \
4695         case 3:                                                         \
4696           if (! full)                                                   \
4697             HALT_UNALLOC;                                               \
4698           for (i = 0; i < 2; i++)                                       \
4699             aarch64_set_vec_u64 (cpu, vd, i, \
4700                                  aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4701                                  CMP                                    \
4702                                  aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4703                                  ? -1ULL : 0);                          \
4704           return;                                                       \
4705         }                                                               \
4706     }                                                                   \
4707   while (0)
4708
4709 #define VEC_CMP0(SOURCE, CMP)                                           \
4710   do                                                                    \
4711     {                                                                   \
4712       switch (size)                                                     \
4713         {                                                               \
4714         case 0:                                                         \
4715           for (i = 0; i < (full ? 16 : 8); i++)                         \
4716             aarch64_set_vec_u8 (cpu, vd, i,                             \
4717                                 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4718                                 CMP 0 ? -1 : 0);                        \
4719           return;                                                       \
4720         case 1:                                                         \
4721           for (i = 0; i < (full ? 8 : 4); i++)                          \
4722             aarch64_set_vec_u16 (cpu, vd, i,                            \
4723                                  aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4724                                  CMP 0 ? -1 : 0);                       \
4725           return;                                                       \
4726         case 2:                                                         \
4727           for (i = 0; i < (full ? 4 : 2); i++)                          \
4728             aarch64_set_vec_u32 (cpu, vd, i,                            \
4729                                  aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4730                                  CMP 0 ? -1 : 0);                       \
4731           return;                                                       \
4732         case 3:                                                         \
4733           if (! full)                                                   \
4734             HALT_UNALLOC;                                               \
4735           for (i = 0; i < 2; i++)                                       \
4736             aarch64_set_vec_u64 (cpu, vd, i,                            \
4737                                  aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4738                                  CMP 0 ? -1ULL : 0);                    \
4739           return;                                                       \
4740         }                                                               \
4741     }                                                                   \
4742   while (0)
4743
4744 #define VEC_FCMP0(CMP)                                                  \
4745   do                                                                    \
4746     {                                                                   \
4747       if (vm != 0)                                                      \
4748         HALT_NYI;                                                       \
4749       if (INSTR (22, 22))                                               \
4750         {                                                               \
4751           if (! full)                                                   \
4752             HALT_NYI;                                                   \
4753           for (i = 0; i < 2; i++)                                       \
4754             aarch64_set_vec_u64 (cpu, vd, i,                            \
4755                                  aarch64_get_vec_double (cpu, vn, i)    \
4756                                  CMP 0.0 ? -1 : 0);                     \
4757         }                                                               \
4758       else                                                              \
4759         {                                                               \
4760           for (i = 0; i < (full ? 4 : 2); i++)                          \
4761             aarch64_set_vec_u32 (cpu, vd, i,                            \
4762                                  aarch64_get_vec_float (cpu, vn, i)     \
4763                                  CMP 0.0 ? -1 : 0);                     \
4764         }                                                               \
4765       return;                                                           \
4766     }                                                                   \
4767   while (0)
4768
4769 #define VEC_FCMP(CMP)                                                   \
4770   do                                                                    \
4771     {                                                                   \
4772       if (INSTR (22, 22))                                               \
4773         {                                                               \
4774           if (! full)                                                   \
4775             HALT_NYI;                                                   \
4776           for (i = 0; i < 2; i++)                                       \
4777             aarch64_set_vec_u64 (cpu, vd, i,                            \
4778                                  aarch64_get_vec_double (cpu, vn, i)    \
4779                                  CMP                                    \
4780                                  aarch64_get_vec_double (cpu, vm, i)    \
4781                                  ? -1 : 0);                             \
4782         }                                                               \
4783       else                                                              \
4784         {                                                               \
4785           for (i = 0; i < (full ? 4 : 2); i++)                          \
4786             aarch64_set_vec_u32 (cpu, vd, i,                            \
4787                                  aarch64_get_vec_float (cpu, vn, i)     \
4788                                  CMP                                    \
4789                                  aarch64_get_vec_float (cpu, vm, i)     \
4790                                  ? -1 : 0);                             \
4791         }                                                               \
4792       return;                                                           \
4793     }                                                                   \
4794   while (0)
4795
4796 static void
4797 do_vec_compare (sim_cpu *cpu)
4798 {
4799   /* instr[31]    = 0
4800      instr[30]    = half(0)/full(1)
4801      instr[29]    = part-of-comparison-type
4802      instr[28,24] = 0 1110
4803      instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4804                     type of float compares: single (-0) / double (-1)
4805      instr[21]    = 1
4806      instr[20,16] = Vm or 00000 (compare vs 0)
4807      instr[15,10] = part-of-comparison-type
4808      instr[9,5]   = Vn
4809      instr[4.0]   = Vd.  */
4810
4811   int full = INSTR (30, 30);
4812   int size = INSTR (23, 22);
4813   unsigned vm = INSTR (20, 16);
4814   unsigned vn = INSTR (9, 5);
4815   unsigned vd = INSTR (4, 0);
4816   unsigned i;
4817
4818   NYI_assert (28, 24, 0x0E);
4819   NYI_assert (21, 21, 1);
4820
4821   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4822   if ((INSTR (11, 11)
4823        && INSTR (14, 14))
4824       || ((INSTR (11, 11) == 0
4825            && INSTR (10, 10) == 0)))
4826     {
4827       /* A compare vs 0.  */
4828       if (vm != 0)
4829         {
4830           if (INSTR (15, 10) == 0x2A)
4831             do_vec_maxv (cpu);
4832           else if (INSTR (15, 10) == 0x32
4833                    || INSTR (15, 10) == 0x3E)
4834             do_vec_fminmaxV (cpu);
4835           else if (INSTR (29, 23) == 0x1C
4836                    && INSTR (21, 10) == 0x876)
4837             do_vec_SCVTF (cpu);
4838           else
4839             HALT_NYI;
4840           return;
4841         }
4842     }
4843
4844   if (INSTR (14, 14))
4845     {
4846       /* A floating point compare.  */
4847       unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4848         | INSTR (13, 10);
4849
4850       NYI_assert (15, 15, 1);
4851
4852       switch (decode)
4853         {
4854         case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4855         case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4856         case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4857         case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4858         case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4859         case /* 0b111001: GT */   0x39: VEC_FCMP  (>);
4860         case /* 0b101001: GE */   0x29: VEC_FCMP  (>=);
4861         case /* 0b001001: EQ */   0x09: VEC_FCMP  (==);
4862
4863         default:
4864           HALT_NYI;
4865         }
4866     }
4867   else
4868     {
4869       unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4870
4871       switch (decode)
4872         {
4873         case 0x0D: /* 0001101 GT */     VEC_CMP  (s, > );
4874         case 0x0F: /* 0001111 GE */     VEC_CMP  (s, >= );
4875         case 0x22: /* 0100010 GT #0 */  VEC_CMP0 (s, > );
4876         case 0x23: /* 0100011 TST */    VEC_CMP  (u, & );
4877         case 0x26: /* 0100110 EQ #0 */  VEC_CMP0 (s, == );
4878         case 0x2A: /* 0101010 LT #0 */  VEC_CMP0 (s, < );
4879         case 0x4D: /* 1001101 HI */     VEC_CMP  (u, > );
4880         case 0x4F: /* 1001111 HS */     VEC_CMP  (u, >= );
4881         case 0x62: /* 1100010 GE #0 */  VEC_CMP0 (s, >= );
4882         case 0x63: /* 1100011 EQ */     VEC_CMP  (u, == );
4883         case 0x66: /* 1100110 LE #0 */  VEC_CMP0 (s, <= );
4884         default:
4885           if (vm == 0)
4886             HALT_NYI;
4887           do_vec_maxv (cpu);
4888         }
4889     }
4890 }
4891
4892 static void
4893 do_vec_SSHL (sim_cpu *cpu)
4894 {
4895   /* instr[31]    = 0
4896      instr[30]    = first part (0)/ second part (1)
4897      instr[29,24] = 00 1110
4898      instr[23,22] = size: byte(00), half(01), word (10), long (11)
4899      instr[21]    = 1
4900      instr[20,16] = Vm
4901      instr[15,10] = 0100 01
4902      instr[9,5]   = Vn
4903      instr[4,0]   = Vd.  */
4904
4905   unsigned full = INSTR (30, 30);
4906   unsigned vm = INSTR (20, 16);
4907   unsigned vn = INSTR (9, 5);
4908   unsigned vd = INSTR (4, 0);
4909   unsigned i;
4910   signed int shift;
4911
4912   NYI_assert (29, 24, 0x0E);
4913   NYI_assert (21, 21, 1);
4914   NYI_assert (15, 10, 0x11);
4915
4916   /* FIXME: What is a signed shift left in this context ?.  */
4917
4918   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4919   switch (INSTR (23, 22))
4920     {
4921     case 0:
4922       for (i = 0; i < (full ? 16 : 8); i++)
4923         {
4924           shift = aarch64_get_vec_s8 (cpu, vm, i);
4925           if (shift >= 0)
4926             aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4927                                 << shift);
4928           else
4929             aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4930                                 >> - shift);
4931         }
4932       return;
4933
4934     case 1:
4935       for (i = 0; i < (full ? 8 : 4); i++)
4936         {
4937           shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4938           if (shift >= 0)
4939             aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4940                                  << shift);
4941           else
4942             aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4943                                  >> - shift);
4944         }
4945       return;
4946
4947     case 2:
4948       for (i = 0; i < (full ? 4 : 2); i++)
4949         {
4950           shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4951           if (shift >= 0)
4952             aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4953                                  << shift);
4954           else
4955             aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4956                                  >> - shift);
4957         }
4958       return;
4959
4960     case 3:
4961       if (! full)
4962         HALT_UNALLOC;
4963       for (i = 0; i < 2; i++)
4964         {
4965           shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4966           if (shift >= 0)
4967             aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4968                                  << shift);
4969           else
4970             aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4971                                  >> - shift);
4972         }
4973       return;
4974     }
4975 }
4976
4977 static void
4978 do_vec_USHL (sim_cpu *cpu)
4979 {
4980   /* instr[31]    = 0
4981      instr[30]    = first part (0)/ second part (1)
4982      instr[29,24] = 10 1110
4983      instr[23,22] = size: byte(00), half(01), word (10), long (11)
4984      instr[21]    = 1
4985      instr[20,16] = Vm
4986      instr[15,10] = 0100 01
4987      instr[9,5]   = Vn
4988      instr[4,0]   = Vd  */
4989
4990   unsigned full = INSTR (30, 30);
4991   unsigned vm = INSTR (20, 16);
4992   unsigned vn = INSTR (9, 5);
4993   unsigned vd = INSTR (4, 0);
4994   unsigned i;
4995   signed int shift;
4996
4997   NYI_assert (29, 24, 0x2E);
4998   NYI_assert (15, 10, 0x11);
4999
5000   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5001   switch (INSTR (23, 22))
5002     {
5003     case 0:
5004         for (i = 0; i < (full ? 16 : 8); i++)
5005           {
5006             shift = aarch64_get_vec_s8 (cpu, vm, i);
5007             if (shift >= 0)
5008               aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5009                                   << shift);
5010             else
5011               aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5012                                   >> - shift);
5013           }
5014       return;
5015
5016     case 1:
5017       for (i = 0; i < (full ? 8 : 4); i++)
5018         {
5019           shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
5020           if (shift >= 0)
5021             aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5022                                  << shift);
5023           else
5024             aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5025                                  >> - shift);
5026         }
5027       return;
5028
5029     case 2:
5030       for (i = 0; i < (full ? 4 : 2); i++)
5031         {
5032           shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
5033           if (shift >= 0)
5034             aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5035                                  << shift);
5036           else
5037             aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5038                                  >> - shift);
5039         }
5040       return;
5041
5042     case 3:
5043       if (! full)
5044         HALT_UNALLOC;
5045       for (i = 0; i < 2; i++)
5046         {
5047           shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
5048           if (shift >= 0)
5049             aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5050                                  << shift);
5051           else
5052             aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5053                                  >> - shift);
5054         }
5055       return;
5056     }
5057 }
5058
5059 static void
5060 do_vec_FMLA (sim_cpu *cpu)
5061 {
5062   /* instr[31]    = 0
5063      instr[30]    = full/half selector
5064      instr[29,23] = 0011100
5065      instr[22]    = size: 0=>float, 1=>double
5066      instr[21]    = 1
5067      instr[20,16] = Vn
5068      instr[15,10] = 1100 11
5069      instr[9,5]   = Vm
5070      instr[4.0]   = Vd.  */
5071
5072   unsigned vm = INSTR (20, 16);
5073   unsigned vn = INSTR (9, 5);
5074   unsigned vd = INSTR (4, 0);
5075   unsigned i;
5076   int      full = INSTR (30, 30);
5077
5078   NYI_assert (29, 23, 0x1C);
5079   NYI_assert (21, 21, 1);
5080   NYI_assert (15, 10, 0x33);
5081
5082   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5083   if (INSTR (22, 22))
5084     {
5085       if (! full)
5086         HALT_UNALLOC;
5087       for (i = 0; i < 2; i++)
5088         aarch64_set_vec_double (cpu, vd, i,
5089                                 aarch64_get_vec_double (cpu, vn, i) *
5090                                 aarch64_get_vec_double (cpu, vm, i) +
5091                                 aarch64_get_vec_double (cpu, vd, i));
5092     }
5093   else
5094     {
5095       for (i = 0; i < (full ? 4 : 2); i++)
5096         aarch64_set_vec_float (cpu, vd, i,
5097                                aarch64_get_vec_float (cpu, vn, i) *
5098                                aarch64_get_vec_float (cpu, vm, i) +
5099                                aarch64_get_vec_float (cpu, vd, i));
5100     }
5101 }
5102
5103 static void
5104 do_vec_max (sim_cpu *cpu)
5105 {
5106   /* instr[31]    = 0
5107      instr[30]    = full/half selector
5108      instr[29]    = SMAX (0) / UMAX (1)
5109      instr[28,24] = 0 1110
5110      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5111      instr[21]    = 1
5112      instr[20,16] = Vn
5113      instr[15,10] = 0110 01
5114      instr[9,5]   = Vm
5115      instr[4.0]   = Vd.  */
5116
5117   unsigned vm = INSTR (20, 16);
5118   unsigned vn = INSTR (9, 5);
5119   unsigned vd = INSTR (4, 0);
5120   unsigned i;
5121   int      full = INSTR (30, 30);
5122
5123   NYI_assert (28, 24, 0x0E);
5124   NYI_assert (21, 21, 1);
5125   NYI_assert (15, 10, 0x19);
5126
5127   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5128   if (INSTR (29, 29))
5129     {
5130       switch (INSTR (23, 22))
5131         {
5132         case 0:
5133           for (i = 0; i < (full ? 16 : 8); i++)
5134             aarch64_set_vec_u8 (cpu, vd, i,
5135                                 aarch64_get_vec_u8 (cpu, vn, i)
5136                                 > aarch64_get_vec_u8 (cpu, vm, i)
5137                                 ? aarch64_get_vec_u8 (cpu, vn, i)
5138                                 : aarch64_get_vec_u8 (cpu, vm, i));
5139           return;
5140
5141         case 1:
5142           for (i = 0; i < (full ? 8 : 4); i++)
5143             aarch64_set_vec_u16 (cpu, vd, i,
5144                                  aarch64_get_vec_u16 (cpu, vn, i)
5145                                  > aarch64_get_vec_u16 (cpu, vm, i)
5146                                  ? aarch64_get_vec_u16 (cpu, vn, i)
5147                                  : aarch64_get_vec_u16 (cpu, vm, i));
5148           return;
5149
5150         case 2:
5151           for (i = 0; i < (full ? 4 : 2); i++)
5152             aarch64_set_vec_u32 (cpu, vd, i,
5153                                  aarch64_get_vec_u32 (cpu, vn, i)
5154                                  > aarch64_get_vec_u32 (cpu, vm, i)
5155                                  ? aarch64_get_vec_u32 (cpu, vn, i)
5156                                  : aarch64_get_vec_u32 (cpu, vm, i));
5157           return;
5158
5159         case 3:
5160           HALT_UNALLOC;
5161         }
5162     }
5163   else
5164     {
5165       switch (INSTR (23, 22))
5166         {
5167         case 0:
5168           for (i = 0; i < (full ? 16 : 8); i++)
5169             aarch64_set_vec_s8 (cpu, vd, i,
5170                                 aarch64_get_vec_s8 (cpu, vn, i)
5171                                 > aarch64_get_vec_s8 (cpu, vm, i)
5172                                 ? aarch64_get_vec_s8 (cpu, vn, i)
5173                                 : aarch64_get_vec_s8 (cpu, vm, i));
5174           return;
5175
5176         case 1:
5177           for (i = 0; i < (full ? 8 : 4); i++)
5178             aarch64_set_vec_s16 (cpu, vd, i,
5179                                  aarch64_get_vec_s16 (cpu, vn, i)
5180                                  > aarch64_get_vec_s16 (cpu, vm, i)
5181                                  ? aarch64_get_vec_s16 (cpu, vn, i)
5182                                  : aarch64_get_vec_s16 (cpu, vm, i));
5183           return;
5184
5185         case 2:
5186           for (i = 0; i < (full ? 4 : 2); i++)
5187             aarch64_set_vec_s32 (cpu, vd, i,
5188                                  aarch64_get_vec_s32 (cpu, vn, i)
5189                                  > aarch64_get_vec_s32 (cpu, vm, i)
5190                                  ? aarch64_get_vec_s32 (cpu, vn, i)
5191                                  : aarch64_get_vec_s32 (cpu, vm, i));
5192           return;
5193
5194         case 3:
5195           HALT_UNALLOC;
5196         }
5197     }
5198 }
5199
5200 static void
5201 do_vec_min (sim_cpu *cpu)
5202 {
5203   /* instr[31]    = 0
5204      instr[30]    = full/half selector
5205      instr[29]    = SMIN (0) / UMIN (1)
5206      instr[28,24] = 0 1110
5207      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5208      instr[21]    = 1
5209      instr[20,16] = Vn
5210      instr[15,10] = 0110 11
5211      instr[9,5]   = Vm
5212      instr[4.0]   = Vd.  */
5213
5214   unsigned vm = INSTR (20, 16);
5215   unsigned vn = INSTR (9, 5);
5216   unsigned vd = INSTR (4, 0);
5217   unsigned i;
5218   int      full = INSTR (30, 30);
5219
5220   NYI_assert (28, 24, 0x0E);
5221   NYI_assert (21, 21, 1);
5222   NYI_assert (15, 10, 0x1B);
5223
5224   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5225   if (INSTR (29, 29))
5226     {
5227       switch (INSTR (23, 22))
5228         {
5229         case 0:
5230           for (i = 0; i < (full ? 16 : 8); i++)
5231             aarch64_set_vec_u8 (cpu, vd, i,
5232                                 aarch64_get_vec_u8 (cpu, vn, i)
5233                                 < aarch64_get_vec_u8 (cpu, vm, i)
5234                                 ? aarch64_get_vec_u8 (cpu, vn, i)
5235                                 : aarch64_get_vec_u8 (cpu, vm, i));
5236           return;
5237
5238         case 1:
5239           for (i = 0; i < (full ? 8 : 4); i++)
5240             aarch64_set_vec_u16 (cpu, vd, i,
5241                                  aarch64_get_vec_u16 (cpu, vn, i)
5242                                  < aarch64_get_vec_u16 (cpu, vm, i)
5243                                  ? aarch64_get_vec_u16 (cpu, vn, i)
5244                                  : aarch64_get_vec_u16 (cpu, vm, i));
5245           return;
5246
5247         case 2:
5248           for (i = 0; i < (full ? 4 : 2); i++)
5249             aarch64_set_vec_u32 (cpu, vd, i,
5250                                  aarch64_get_vec_u32 (cpu, vn, i)
5251                                  < aarch64_get_vec_u32 (cpu, vm, i)
5252                                  ? aarch64_get_vec_u32 (cpu, vn, i)
5253                                  : aarch64_get_vec_u32 (cpu, vm, i));
5254           return;
5255
5256         case 3:
5257           HALT_UNALLOC;
5258         }
5259     }
5260   else
5261     {
5262       switch (INSTR (23, 22))
5263         {
5264         case 0:
5265           for (i = 0; i < (full ? 16 : 8); i++)
5266             aarch64_set_vec_s8 (cpu, vd, i,
5267                                 aarch64_get_vec_s8 (cpu, vn, i)
5268                                 < aarch64_get_vec_s8 (cpu, vm, i)
5269                                 ? aarch64_get_vec_s8 (cpu, vn, i)
5270                                 : aarch64_get_vec_s8 (cpu, vm, i));
5271           return;
5272
5273         case 1:
5274           for (i = 0; i < (full ? 8 : 4); i++)
5275             aarch64_set_vec_s16 (cpu, vd, i,
5276                                  aarch64_get_vec_s16 (cpu, vn, i)
5277                                  < aarch64_get_vec_s16 (cpu, vm, i)
5278                                  ? aarch64_get_vec_s16 (cpu, vn, i)
5279                                  : aarch64_get_vec_s16 (cpu, vm, i));
5280           return;
5281
5282         case 2:
5283           for (i = 0; i < (full ? 4 : 2); i++)
5284             aarch64_set_vec_s32 (cpu, vd, i,
5285                                  aarch64_get_vec_s32 (cpu, vn, i)
5286                                  < aarch64_get_vec_s32 (cpu, vm, i)
5287                                  ? aarch64_get_vec_s32 (cpu, vn, i)
5288                                  : aarch64_get_vec_s32 (cpu, vm, i));
5289           return;
5290
5291         case 3:
5292           HALT_UNALLOC;
5293         }
5294     }
5295 }
5296
5297 static void
5298 do_vec_sub_long (sim_cpu *cpu)
5299 {
5300   /* instr[31]    = 0
5301      instr[30]    = lower (0) / upper (1)
5302      instr[29]    = signed (0) / unsigned (1)
5303      instr[28,24] = 0 1110
5304      instr[23,22] = size: bytes (00), half (01), word (10)
5305      instr[21]    = 1
5306      insrt[20,16] = Vm
5307      instr[15,10] = 0010 00
5308      instr[9,5]   = Vn
5309      instr[4,0]   = V dest.  */
5310
5311   unsigned size = INSTR (23, 22);
5312   unsigned vm = INSTR (20, 16);
5313   unsigned vn = INSTR (9, 5);
5314   unsigned vd = INSTR (4, 0);
5315   unsigned bias = 0;
5316   unsigned i;
5317
5318   NYI_assert (28, 24, 0x0E);
5319   NYI_assert (21, 21, 1);
5320   NYI_assert (15, 10, 0x08);
5321
5322   if (size == 3)
5323     HALT_UNALLOC;
5324
5325   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5326   switch (INSTR (30, 29))
5327     {
5328     case 2: /* SSUBL2.  */
5329       bias = 2;
5330     case 0: /* SSUBL.  */
5331       switch (size)
5332         {
5333         case 0:
5334           bias *= 3;
5335           for (i = 0; i < 8; i++)
5336             aarch64_set_vec_s16 (cpu, vd, i,
5337                                  aarch64_get_vec_s8 (cpu, vn, i + bias)
5338                                  - aarch64_get_vec_s8 (cpu, vm, i + bias));
5339           break;
5340
5341         case 1:
5342           bias *= 2;
5343           for (i = 0; i < 4; i++)
5344             aarch64_set_vec_s32 (cpu, vd, i,
5345                                  aarch64_get_vec_s16 (cpu, vn, i + bias)
5346                                  - aarch64_get_vec_s16 (cpu, vm, i + bias));
5347           break;
5348
5349         case 2:
5350           for (i = 0; i < 2; i++)
5351             aarch64_set_vec_s64 (cpu, vd, i,
5352                                  aarch64_get_vec_s32 (cpu, vn, i + bias)
5353                                  - aarch64_get_vec_s32 (cpu, vm, i + bias));
5354           break;
5355
5356         default:
5357           HALT_UNALLOC;
5358         }
5359       break;
5360
5361     case 3: /* USUBL2.  */
5362       bias = 2;
5363     case 1: /* USUBL.  */
5364       switch (size)
5365         {
5366         case 0:
5367           bias *= 3;
5368           for (i = 0; i < 8; i++)
5369             aarch64_set_vec_u16 (cpu, vd, i,
5370                                  aarch64_get_vec_u8 (cpu, vn, i + bias)
5371                                  - aarch64_get_vec_u8 (cpu, vm, i + bias));
5372           break;
5373
5374         case 1:
5375           bias *= 2;
5376           for (i = 0; i < 4; i++)
5377             aarch64_set_vec_u32 (cpu, vd, i,
5378                                  aarch64_get_vec_u16 (cpu, vn, i + bias)
5379                                  - aarch64_get_vec_u16 (cpu, vm, i + bias));
5380           break;
5381
5382         case 2:
5383           for (i = 0; i < 2; i++)
5384             aarch64_set_vec_u64 (cpu, vd, i,
5385                                  aarch64_get_vec_u32 (cpu, vn, i + bias)
5386                                  - aarch64_get_vec_u32 (cpu, vm, i + bias));
5387           break;
5388
5389         default:
5390           HALT_UNALLOC;
5391         }
5392       break;
5393     }
5394 }
5395
5396 static void
5397 do_vec_ADDP (sim_cpu *cpu)
5398 {
5399   /* instr[31]    = 0
5400      instr[30]    = half(0)/full(1)
5401      instr[29,24] = 00 1110
5402      instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5403      instr[21]    = 1
5404      insrt[20,16] = Vm
5405      instr[15,10] = 1011 11
5406      instr[9,5]   = Vn
5407      instr[4,0]   = V dest.  */
5408
5409   FRegister copy_vn;
5410   FRegister copy_vm;
5411   unsigned full = INSTR (30, 30);
5412   unsigned size = INSTR (23, 22);
5413   unsigned vm = INSTR (20, 16);
5414   unsigned vn = INSTR (9, 5);
5415   unsigned vd = INSTR (4, 0);
5416   unsigned i, range;
5417
5418   NYI_assert (29, 24, 0x0E);
5419   NYI_assert (21, 21, 1);
5420   NYI_assert (15, 10, 0x2F);
5421
5422   /* Make copies of the source registers in case vd == vn/vm.  */
5423   copy_vn = cpu->fr[vn];
5424   copy_vm = cpu->fr[vm];
5425
5426   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5427   switch (size)
5428     {
5429     case 0:
5430       range = full ? 8 : 4;
5431       for (i = 0; i < range; i++)
5432         {
5433           aarch64_set_vec_u8 (cpu, vd, i,
5434                               copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5435           aarch64_set_vec_u8 (cpu, vd, i + range,
5436                               copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5437         }
5438       return;
5439
5440     case 1:
5441       range = full ? 4 : 2;
5442       for (i = 0; i < range; i++)
5443         {
5444           aarch64_set_vec_u16 (cpu, vd, i,
5445                                copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5446           aarch64_set_vec_u16 (cpu, vd, i + range,
5447                                copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5448         }
5449       return;
5450
5451     case 2:
5452       range = full ? 2 : 1;
5453       for (i = 0; i < range; i++)
5454         {
5455           aarch64_set_vec_u32 (cpu, vd, i,
5456                                copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5457           aarch64_set_vec_u32 (cpu, vd, i + range,
5458                                copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5459         }
5460       return;
5461
5462     case 3:
5463       if (! full)
5464         HALT_UNALLOC;
5465       aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5466       aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5467       return;
5468     }
5469 }
5470
5471 static void
5472 do_vec_FABS (sim_cpu *cpu)
5473 {
5474   /* instr[31]    = 0
5475      instr[30]    = half(0)/full(1)
5476      instr[29,23] = 00 1110 1
5477      instr[22]    = float(0)/double(1)
5478      instr[21,16] = 10 0000
5479      instr[15,10] = 1111 10
5480      instr[9,5]   = Vn
5481      instr[4,0]   = Vd.  */
5482
5483   unsigned vn = INSTR (9, 5);
5484   unsigned vd = INSTR (4, 0);
5485   unsigned full = INSTR (30, 30);
5486   unsigned i;
5487
5488   NYI_assert (29, 23, 0x1D);
5489   NYI_assert (21, 10, 0x83E);
5490
5491   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5492   if (INSTR (22, 22))
5493     {
5494       if (! full)
5495         HALT_NYI;
5496
5497       for (i = 0; i < 2; i++)
5498         aarch64_set_vec_double (cpu, vd, i,
5499                                 fabs (aarch64_get_vec_double (cpu, vn, i)));
5500     }
5501   else
5502     {
5503       for (i = 0; i < (full ? 4 : 2); i++)
5504         aarch64_set_vec_float (cpu, vd, i,
5505                                fabsf (aarch64_get_vec_float (cpu, vn, i)));
5506     }
5507 }
5508
5509 static void
5510 do_vec_FCVTZS (sim_cpu *cpu)
5511 {
5512   /* instr[31]    = 0
5513      instr[30]    = half (0) / all (1)
5514      instr[29,23] = 00 1110 1
5515      instr[22]    = single (0) / double (1)
5516      instr[21,10] = 10 0001 1011 10
5517      instr[9,5]   = Rn
5518      instr[4,0]   = Rd.  */
5519
5520   unsigned rn = INSTR (9, 5);
5521   unsigned rd = INSTR (4, 0);
5522   unsigned full = INSTR (30, 30);
5523   unsigned i;
5524
5525   NYI_assert (31, 31, 0);
5526   NYI_assert (29, 23, 0x1D);
5527   NYI_assert (21, 10, 0x86E);
5528
5529   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5530   if (INSTR (22, 22))
5531     {
5532       if (! full)
5533         HALT_UNALLOC;
5534
5535       for (i = 0; i < 2; i++)
5536         aarch64_set_vec_s64 (cpu, rd, i,
5537                              (int64_t) aarch64_get_vec_double (cpu, rn, i));
5538     }
5539   else
5540     for (i = 0; i < (full ? 4 : 2); i++)
5541       aarch64_set_vec_s32 (cpu, rd, i,
5542                            (int32_t) aarch64_get_vec_float (cpu, rn, i));
5543 }
5544
5545 static void
5546 do_vec_REV64 (sim_cpu *cpu)
5547 {
5548   /* instr[31]    = 0
5549      instr[30]    = full/half
5550      instr[29,24] = 00 1110
5551      instr[23,22] = size
5552      instr[21,10] = 10 0000 0000 10
5553      instr[9,5]   = Rn
5554      instr[4,0]   = Rd.  */
5555
5556   unsigned rn = INSTR (9, 5);
5557   unsigned rd = INSTR (4, 0);
5558   unsigned size = INSTR (23, 22);
5559   unsigned full = INSTR (30, 30);
5560   unsigned i;
5561   FRegister val;
5562
5563   NYI_assert (29, 24, 0x0E);
5564   NYI_assert (21, 10, 0x802);
5565
5566   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5567   switch (size)
5568     {
5569     case 0:
5570       for (i = 0; i < (full ? 16 : 8); i++)
5571         val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5572       break;
5573
5574     case 1:
5575       for (i = 0; i < (full ? 8 : 4); i++)
5576         val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5577       break;
5578
5579     case 2:
5580       for (i = 0; i < (full ? 4 : 2); i++)
5581         val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5582       break;
5583
5584     case 3:
5585       HALT_UNALLOC;
5586     }
5587
5588   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5589   if (full)
5590     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5591 }
5592
5593 static void
5594 do_vec_REV16 (sim_cpu *cpu)
5595 {
5596   /* instr[31]    = 0
5597      instr[30]    = full/half
5598      instr[29,24] = 00 1110
5599      instr[23,22] = size
5600      instr[21,10] = 10 0000 0001 10
5601      instr[9,5]   = Rn
5602      instr[4,0]   = Rd.  */
5603
5604   unsigned rn = INSTR (9, 5);
5605   unsigned rd = INSTR (4, 0);
5606   unsigned size = INSTR (23, 22);
5607   unsigned full = INSTR (30, 30);
5608   unsigned i;
5609   FRegister val;
5610
5611   NYI_assert (29, 24, 0x0E);
5612   NYI_assert (21, 10, 0x806);
5613
5614   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5615   switch (size)
5616     {
5617     case 0:
5618       for (i = 0; i < (full ? 16 : 8); i++)
5619         val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5620       break;
5621
5622     default:
5623       HALT_UNALLOC;
5624     }
5625
5626   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5627   if (full)
5628     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5629 }
5630
5631 static void
5632 do_vec_op1 (sim_cpu *cpu)
5633 {
5634   /* instr[31]    = 0
5635      instr[30]    = half/full
5636      instr[29,24] = 00 1110
5637      instr[23,21] = ???
5638      instr[20,16] = Vm
5639      instr[15,10] = sub-opcode
5640      instr[9,5]   = Vn
5641      instr[4,0]   = Vd  */
5642   NYI_assert (29, 24, 0x0E);
5643
5644   if (INSTR (21, 21) == 0)
5645     {
5646       if (INSTR (23, 22) == 0)
5647         {
5648           if (INSTR (30, 30) == 1
5649               && INSTR (17, 14) == 0
5650               && INSTR (12, 10) == 7)
5651             return do_vec_ins_2 (cpu);
5652
5653           switch (INSTR (15, 10))
5654             {
5655             case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5656             case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5657             case 0x07: do_vec_INS (cpu); return;
5658             case 0x0B: do_vec_SMOV_into_scalar (cpu); return;
5659             case 0x0F: do_vec_UMOV_into_scalar (cpu); return;
5660
5661             case 0x00:
5662             case 0x08:
5663             case 0x10:
5664             case 0x18:
5665               do_vec_TBL (cpu); return;
5666
5667             case 0x06:
5668             case 0x16:
5669               do_vec_UZP (cpu); return;
5670
5671             case 0x0A: do_vec_TRN (cpu); return;
5672
5673             case 0x0E:
5674             case 0x1E:
5675               do_vec_ZIP (cpu); return;
5676
5677             default:
5678               HALT_NYI;
5679             }
5680         }
5681
5682       switch (INSTR (13, 10))
5683         {
5684         case 0x6: do_vec_UZP (cpu); return;
5685         case 0xE: do_vec_ZIP (cpu); return;
5686         case 0xA: do_vec_TRN (cpu); return;
5687         default:  HALT_NYI;
5688         }
5689     }
5690
5691   switch (INSTR (15, 10))
5692     {
5693     case 0x02: do_vec_REV64 (cpu); return;
5694     case 0x06: do_vec_REV16 (cpu); return;
5695
5696     case 0x07:
5697       switch (INSTR (23, 21))
5698         {
5699         case 1: do_vec_AND (cpu); return;
5700         case 3: do_vec_BIC (cpu); return;
5701         case 5: do_vec_ORR (cpu); return;
5702         case 7: do_vec_ORN (cpu); return;
5703         default: HALT_NYI;
5704         }
5705
5706     case 0x08: do_vec_sub_long (cpu); return;
5707     case 0x0a: do_vec_XTN (cpu); return;
5708     case 0x11: do_vec_SSHL (cpu); return;
5709     case 0x16: do_vec_CNT (cpu); return;
5710     case 0x19: do_vec_max (cpu); return;
5711     case 0x1B: do_vec_min (cpu); return;
5712     case 0x21: do_vec_add (cpu); return;
5713     case 0x25: do_vec_MLA (cpu); return;
5714     case 0x27: do_vec_mul (cpu); return;
5715     case 0x2F: do_vec_ADDP (cpu); return;
5716     case 0x30: do_vec_mull (cpu); return;
5717     case 0x33: do_vec_FMLA (cpu); return;
5718     case 0x35: do_vec_fadd (cpu); return;
5719
5720     case 0x2E:
5721       switch (INSTR (20, 16))
5722         {
5723         case 0x00: do_vec_ABS (cpu); return;
5724         case 0x01: do_vec_FCVTZS (cpu); return;
5725         case 0x11: do_vec_ADDV (cpu); return;
5726         default: HALT_NYI;
5727         }
5728
5729     case 0x31:
5730     case 0x3B:
5731       do_vec_Fminmax (cpu); return;
5732
5733     case 0x0D:
5734     case 0x0F:
5735     case 0x22:
5736     case 0x23:
5737     case 0x26:
5738     case 0x2A:
5739     case 0x32:
5740     case 0x36:
5741     case 0x39:
5742     case 0x3A:
5743       do_vec_compare (cpu); return;
5744
5745     case 0x3E:
5746       do_vec_FABS (cpu); return;
5747
5748     default:
5749       HALT_NYI;
5750     }
5751 }
5752
5753 static void
5754 do_vec_xtl (sim_cpu *cpu)
5755 {
5756   /* instr[31]    = 0
5757      instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5758      instr[28,22] = 0 1111 00
5759      instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5760      instr[15,10] = 1010 01
5761      instr[9,5]   = V source
5762      instr[4,0]   = V dest.  */
5763
5764   unsigned vs = INSTR (9, 5);
5765   unsigned vd = INSTR (4, 0);
5766   unsigned i, shift, bias = 0;
5767
5768   NYI_assert (28, 22, 0x3C);
5769   NYI_assert (15, 10, 0x29);
5770
5771   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5772   switch (INSTR (30, 29))
5773     {
5774     case 2: /* SXTL2, SSHLL2.  */
5775       bias = 2;
5776     case 0: /* SXTL, SSHLL.  */
5777       if (INSTR (21, 21))
5778         {
5779           int64_t val1, val2;
5780
5781           shift = INSTR (20, 16);
5782           /* Get the source values before setting the destination values
5783              in case the source and destination are the same.  */
5784           val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5785           val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5786           aarch64_set_vec_s64 (cpu, vd, 0, val1);
5787           aarch64_set_vec_s64 (cpu, vd, 1, val2);
5788         }
5789       else if (INSTR (20, 20))
5790         {
5791           int32_t v[4];
5792           int32_t v1,v2,v3,v4;
5793
5794           shift = INSTR (19, 16);
5795           bias *= 2;
5796           for (i = 0; i < 4; i++)
5797             v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5798           for (i = 0; i < 4; i++)
5799             aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5800         }
5801       else
5802         {
5803           int16_t v[8];
5804           NYI_assert (19, 19, 1);
5805
5806           shift = INSTR (18, 16);
5807           bias *= 4;
5808           for (i = 0; i < 8; i++)
5809             v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5810           for (i = 0; i < 8; i++)
5811             aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5812         }
5813       return;
5814
5815     case 3: /* UXTL2, USHLL2.  */
5816       bias = 2;
5817     case 1: /* UXTL, USHLL.  */
5818       if (INSTR (21, 21))
5819         {
5820           uint64_t v1, v2;
5821           shift = INSTR (20, 16);
5822           v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5823           v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5824           aarch64_set_vec_u64 (cpu, vd, 0, v1);
5825           aarch64_set_vec_u64 (cpu, vd, 1, v2);
5826         }
5827       else if (INSTR (20, 20))
5828         {
5829           uint32_t v[4];
5830           shift = INSTR (19, 16);
5831           bias *= 2;
5832           for (i = 0; i < 4; i++)
5833             v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5834           for (i = 0; i < 4; i++)
5835             aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5836         }
5837       else
5838         {
5839           uint16_t v[8];
5840           NYI_assert (19, 19, 1);
5841
5842           shift = INSTR (18, 16);
5843           bias *= 4;
5844           for (i = 0; i < 8; i++)
5845             v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5846           for (i = 0; i < 8; i++)
5847             aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5848         }
5849       return;
5850     }
5851 }
5852
5853 static void
5854 do_vec_SHL (sim_cpu *cpu)
5855 {
5856   /* instr [31]    = 0
5857      instr [30]    = half(0)/full(1)
5858      instr [29,23] = 001 1110
5859      instr [22,16] = size and shift amount
5860      instr [15,10] = 01 0101
5861      instr [9, 5]  = Vs
5862      instr [4, 0]  = Vd.  */
5863
5864   int shift;
5865   int full    = INSTR (30, 30);
5866   unsigned vs = INSTR (9, 5);
5867   unsigned vd = INSTR (4, 0);
5868   unsigned i;
5869
5870   NYI_assert (29, 23, 0x1E);
5871   NYI_assert (15, 10, 0x15);
5872
5873   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5874   if (INSTR (22, 22))
5875     {
5876       shift = INSTR (21, 16);
5877
5878       if (full == 0)
5879         HALT_UNALLOC;
5880
5881       for (i = 0; i < 2; i++)
5882         {
5883           uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5884           aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5885         }
5886
5887       return;
5888     }
5889
5890   if (INSTR (21, 21))
5891     {
5892       shift = INSTR (20, 16);
5893
5894       for (i = 0; i < (full ? 4 : 2); i++)
5895         {
5896           uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5897           aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5898         }
5899
5900       return;
5901     }
5902
5903   if (INSTR (20, 20))
5904     {
5905       shift = INSTR (19, 16);
5906
5907       for (i = 0; i < (full ? 8 : 4); i++)
5908         {
5909           uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5910           aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5911         }
5912
5913       return;
5914     }
5915
5916   if (INSTR (19, 19) == 0)
5917     HALT_UNALLOC;
5918
5919   shift = INSTR (18, 16);
5920
5921   for (i = 0; i < (full ? 16 : 8); i++)
5922     {
5923       uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5924       aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5925     }
5926 }
5927
5928 static void
5929 do_vec_SSHR_USHR (sim_cpu *cpu)
5930 {
5931   /* instr [31]    = 0
5932      instr [30]    = half(0)/full(1)
5933      instr [29]    = signed(0)/unsigned(1)
5934      instr [28,23] = 0 1111 0
5935      instr [22,16] = size and shift amount
5936      instr [15,10] = 0000 01
5937      instr [9, 5]  = Vs
5938      instr [4, 0]  = Vd.  */
5939
5940   int full       = INSTR (30, 30);
5941   int sign       = ! INSTR (29, 29);
5942   unsigned shift = INSTR (22, 16);
5943   unsigned vs    = INSTR (9, 5);
5944   unsigned vd    = INSTR (4, 0);
5945   unsigned i;
5946
5947   NYI_assert (28, 23, 0x1E);
5948   NYI_assert (15, 10, 0x01);
5949
5950   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5951   if (INSTR (22, 22))
5952     {
5953       shift = 128 - shift;
5954
5955       if (full == 0)
5956         HALT_UNALLOC;
5957
5958       if (sign)
5959         for (i = 0; i < 2; i++)
5960           {
5961             int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5962             aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5963           }
5964       else
5965         for (i = 0; i < 2; i++)
5966           {
5967             uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5968             aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5969           }
5970
5971       return;
5972     }
5973
5974   if (INSTR (21, 21))
5975     {
5976       shift = 64 - shift;
5977
5978       if (sign)
5979         for (i = 0; i < (full ? 4 : 2); i++)
5980           {
5981             int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5982             aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5983           }
5984       else
5985         for (i = 0; i < (full ? 4 : 2); i++)
5986           {
5987             uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5988             aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5989           }
5990
5991       return;
5992     }
5993
5994   if (INSTR (20, 20))
5995     {
5996       shift = 32 - shift;
5997
5998       if (sign)
5999         for (i = 0; i < (full ? 8 : 4); i++)
6000           {
6001             int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
6002             aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
6003           }
6004       else
6005         for (i = 0; i < (full ? 8 : 4); i++)
6006           {
6007             uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
6008             aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
6009           }
6010
6011       return;
6012     }
6013
6014   if (INSTR (19, 19) == 0)
6015     HALT_UNALLOC;
6016
6017   shift = 16 - shift;
6018
6019   if (sign)
6020     for (i = 0; i < (full ? 16 : 8); i++)
6021       {
6022         int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
6023         aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
6024       }
6025   else
6026     for (i = 0; i < (full ? 16 : 8); i++)
6027       {
6028         uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
6029         aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
6030       }
6031 }
6032
6033 static void
6034 do_vec_MUL_by_element (sim_cpu *cpu)
6035 {
6036   /* instr[31]    = 0
6037      instr[30]    = half/full
6038      instr[29,24] = 00 1111
6039      instr[23,22] = size
6040      instr[21]    = L
6041      instr[20]    = M
6042      instr[19,16] = m
6043      instr[15,12] = 1000
6044      instr[11]    = H
6045      instr[10]    = 0
6046      instr[9,5]   = Vn
6047      instr[4,0]   = Vd  */
6048
6049   unsigned full     = INSTR (30, 30);
6050   unsigned L        = INSTR (21, 21);
6051   unsigned H        = INSTR (11, 11);
6052   unsigned vn       = INSTR (9, 5);
6053   unsigned vd       = INSTR (4, 0);
6054   unsigned size     = INSTR (23, 22);
6055   unsigned index;
6056   unsigned vm;
6057   unsigned e;
6058
6059   NYI_assert (29, 24, 0x0F);
6060   NYI_assert (15, 12, 0x8);
6061   NYI_assert (10, 10, 0);
6062
6063   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6064   switch (size)
6065     {
6066     case 1:
6067       {
6068         /* 16 bit products.  */
6069         uint16_t product;
6070         uint16_t element1;
6071         uint16_t element2;
6072
6073         index = (H << 2) | (L << 1) | INSTR (20, 20);
6074         vm = INSTR (19, 16);
6075         element2 = aarch64_get_vec_u16 (cpu, vm, index);
6076
6077         for (e = 0; e < (full ? 8 : 4); e ++)
6078           {
6079             element1 = aarch64_get_vec_u16 (cpu, vn, e);
6080             product  = element1 * element2;
6081             aarch64_set_vec_u16 (cpu, vd, e, product);
6082           }
6083       }
6084       break;
6085
6086     case 2:
6087       {
6088         /* 32 bit products.  */
6089         uint32_t product;
6090         uint32_t element1;
6091         uint32_t element2;
6092
6093         index = (H << 1) | L;
6094         vm = INSTR (20, 16);
6095         element2 = aarch64_get_vec_u32 (cpu, vm, index);
6096
6097         for (e = 0; e < (full ? 4 : 2); e ++)
6098           {
6099             element1 = aarch64_get_vec_u32 (cpu, vn, e);
6100             product  = element1 * element2;
6101             aarch64_set_vec_u32 (cpu, vd, e, product);
6102           }
6103       }
6104       break;
6105
6106     default:
6107       HALT_UNALLOC;
6108     }
6109 }
6110
6111 static void
6112 do_FMLA_by_element (sim_cpu *cpu)
6113 {
6114   /* instr[31]    = 0
6115      instr[30]    = half/full
6116      instr[29,23] = 00 1111 1
6117      instr[22]    = size
6118      instr[21]    = L
6119      instr[20,16] = m
6120      instr[15,12] = 0001
6121      instr[11]    = H
6122      instr[10]    = 0
6123      instr[9,5]   = Vn
6124      instr[4,0]   = Vd  */
6125
6126   unsigned full     = INSTR (30, 30);
6127   unsigned size     = INSTR (22, 22);
6128   unsigned L        = INSTR (21, 21);
6129   unsigned vm       = INSTR (20, 16);
6130   unsigned H        = INSTR (11, 11);
6131   unsigned vn       = INSTR (9, 5);
6132   unsigned vd       = INSTR (4, 0);
6133   unsigned e;
6134
6135   NYI_assert (29, 23, 0x1F);
6136   NYI_assert (15, 12, 0x1);
6137   NYI_assert (10, 10, 0);
6138
6139   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6140   if (size)
6141     {
6142       double element1, element2;
6143
6144       if (! full || L)
6145         HALT_UNALLOC;
6146
6147       element2 = aarch64_get_vec_double (cpu, vm, H);
6148
6149       for (e = 0; e < 2; e++)
6150         {
6151           element1 = aarch64_get_vec_double (cpu, vn, e);
6152           element1 *= element2;
6153           element1 += aarch64_get_vec_double (cpu, vd, e);
6154           aarch64_set_vec_double (cpu, vd, e, element1);
6155         }
6156     }
6157   else
6158     {
6159       float element1;
6160       float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6161
6162       for (e = 0; e < (full ? 4 : 2); e++)
6163         {
6164           element1 = aarch64_get_vec_float (cpu, vn, e);
6165           element1 *= element2;
6166           element1 += aarch64_get_vec_float (cpu, vd, e);
6167           aarch64_set_vec_float (cpu, vd, e, element1);
6168         }
6169     }
6170 }
6171
6172 static void
6173 do_vec_op2 (sim_cpu *cpu)
6174 {
6175   /* instr[31]    = 0
6176      instr[30]    = half/full
6177      instr[29,24] = 00 1111
6178      instr[23]    = ?
6179      instr[22,16] = element size & index
6180      instr[15,10] = sub-opcode
6181      instr[9,5]   = Vm
6182      instr[4,0]   = Vd  */
6183
6184   NYI_assert (29, 24, 0x0F);
6185
6186   if (INSTR (23, 23) != 0)
6187     {
6188       switch (INSTR (15, 10))
6189         {
6190         case 0x04:
6191         case 0x06:
6192           do_FMLA_by_element (cpu);
6193           return;
6194
6195         case 0x20:
6196         case 0x22:
6197           do_vec_MUL_by_element (cpu);
6198           return;
6199
6200         default:
6201           HALT_NYI;
6202         }
6203     }
6204   else
6205     {
6206       switch (INSTR (15, 10))
6207         {
6208         case 0x01: do_vec_SSHR_USHR (cpu); return;
6209         case 0x15: do_vec_SHL (cpu); return;
6210         case 0x20:
6211         case 0x22: do_vec_MUL_by_element (cpu); return;
6212         case 0x29: do_vec_xtl (cpu); return;
6213         default:   HALT_NYI;
6214         }
6215     }
6216 }
6217
6218 static void
6219 do_vec_neg (sim_cpu *cpu)
6220 {
6221   /* instr[31]    = 0
6222      instr[30]    = full(1)/half(0)
6223      instr[29,24] = 10 1110
6224      instr[23,22] = size: byte(00), half (01), word (10), long (11)
6225      instr[21,10] = 1000 0010 1110
6226      instr[9,5]   = Vs
6227      instr[4,0]   = Vd  */
6228
6229   int    full = INSTR (30, 30);
6230   unsigned vs = INSTR (9, 5);
6231   unsigned vd = INSTR (4, 0);
6232   unsigned i;
6233
6234   NYI_assert (29, 24, 0x2E);
6235   NYI_assert (21, 10, 0x82E);
6236
6237   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6238   switch (INSTR (23, 22))
6239     {
6240     case 0:
6241       for (i = 0; i < (full ? 16 : 8); i++)
6242         aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6243       return;
6244
6245     case 1:
6246       for (i = 0; i < (full ? 8 : 4); i++)
6247         aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6248       return;
6249
6250     case 2:
6251       for (i = 0; i < (full ? 4 : 2); i++)
6252         aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6253       return;
6254
6255     case 3:
6256       if (! full)
6257         HALT_NYI;
6258       for (i = 0; i < 2; i++)
6259         aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6260       return;
6261     }
6262 }
6263
6264 static void
6265 do_vec_sqrt (sim_cpu *cpu)
6266 {
6267   /* instr[31]    = 0
6268      instr[30]    = full(1)/half(0)
6269      instr[29,23] = 101 1101
6270      instr[22]    = single(0)/double(1)
6271      instr[21,10] = 1000 0111 1110
6272      instr[9,5]   = Vs
6273      instr[4,0]   = Vd.  */
6274
6275   int    full = INSTR (30, 30);
6276   unsigned vs = INSTR (9, 5);
6277   unsigned vd = INSTR (4, 0);
6278   unsigned i;
6279
6280   NYI_assert (29, 23, 0x5B);
6281   NYI_assert (21, 10, 0x87E);
6282
6283   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6284   if (INSTR (22, 22) == 0)
6285     for (i = 0; i < (full ? 4 : 2); i++)
6286       aarch64_set_vec_float (cpu, vd, i,
6287                              sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6288   else
6289     for (i = 0; i < 2; i++)
6290       aarch64_set_vec_double (cpu, vd, i,
6291                               sqrt (aarch64_get_vec_double (cpu, vs, i)));
6292 }
6293
6294 static void
6295 do_vec_mls_indexed (sim_cpu *cpu)
6296 {
6297   /* instr[31]       = 0
6298      instr[30]       = half(0)/full(1)
6299      instr[29,24]    = 10 1111
6300      instr[23,22]    = 16-bit(01)/32-bit(10)
6301      instr[21,20+11] = index (if 16-bit)
6302      instr[21+11]    = index (if 32-bit)
6303      instr[20,16]    = Vm
6304      instr[15,12]    = 0100
6305      instr[11]       = part of index
6306      instr[10]       = 0
6307      instr[9,5]      = Vs
6308      instr[4,0]      = Vd.  */
6309
6310   int    full = INSTR (30, 30);
6311   unsigned vs = INSTR (9, 5);
6312   unsigned vd = INSTR (4, 0);
6313   unsigned vm = INSTR (20, 16);
6314   unsigned i;
6315
6316   NYI_assert (15, 12, 4);
6317   NYI_assert (10, 10, 0);
6318
6319   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6320   switch (INSTR (23, 22))
6321     {
6322     case 1:
6323       {
6324         unsigned elem;
6325         uint32_t val;
6326
6327         if (vm > 15)
6328           HALT_NYI;
6329
6330         elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6331         val = aarch64_get_vec_u16 (cpu, vm, elem);
6332
6333         for (i = 0; i < (full ? 8 : 4); i++)
6334           aarch64_set_vec_u32 (cpu, vd, i,
6335                                aarch64_get_vec_u32 (cpu, vd, i) -
6336                                (aarch64_get_vec_u32 (cpu, vs, i) * val));
6337         return;
6338       }
6339
6340     case 2:
6341       {
6342         unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6343         uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6344
6345         for (i = 0; i < (full ? 4 : 2); i++)
6346           aarch64_set_vec_u64 (cpu, vd, i,
6347                                aarch64_get_vec_u64 (cpu, vd, i) -
6348                                (aarch64_get_vec_u64 (cpu, vs, i) * val));
6349         return;
6350       }
6351
6352     case 0:
6353     case 3:
6354     default:
6355       HALT_NYI;
6356     }
6357 }
6358
6359 static void
6360 do_vec_SUB (sim_cpu *cpu)
6361 {
6362   /* instr [31]    = 0
6363      instr [30]    = half(0)/full(1)
6364      instr [29,24] = 10 1110
6365      instr [23,22] = size: byte(00, half(01), word (10), long (11)
6366      instr [21]    = 1
6367      instr [20,16] = Vm
6368      instr [15,10] = 10 0001
6369      instr [9, 5]  = Vn
6370      instr [4, 0]  = Vd.  */
6371
6372   unsigned full = INSTR (30, 30);
6373   unsigned vm = INSTR (20, 16);
6374   unsigned vn = INSTR (9, 5);
6375   unsigned vd = INSTR (4, 0);
6376   unsigned i;
6377
6378   NYI_assert (29, 24, 0x2E);
6379   NYI_assert (21, 21, 1);
6380   NYI_assert (15, 10, 0x21);
6381
6382   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6383   switch (INSTR (23, 22))
6384     {
6385     case 0:
6386       for (i = 0; i < (full ? 16 : 8); i++)
6387         aarch64_set_vec_s8 (cpu, vd, i,
6388                             aarch64_get_vec_s8 (cpu, vn, i)
6389                             - aarch64_get_vec_s8 (cpu, vm, i));
6390       return;
6391
6392     case 1:
6393       for (i = 0; i < (full ? 8 : 4); i++)
6394         aarch64_set_vec_s16 (cpu, vd, i,
6395                              aarch64_get_vec_s16 (cpu, vn, i)
6396                              - aarch64_get_vec_s16 (cpu, vm, i));
6397       return;
6398
6399     case 2:
6400       for (i = 0; i < (full ? 4 : 2); i++)
6401         aarch64_set_vec_s32 (cpu, vd, i,
6402                              aarch64_get_vec_s32 (cpu, vn, i)
6403                              - aarch64_get_vec_s32 (cpu, vm, i));
6404       return;
6405
6406     case 3:
6407       if (full == 0)
6408         HALT_UNALLOC;
6409
6410       for (i = 0; i < 2; i++)
6411         aarch64_set_vec_s64 (cpu, vd, i,
6412                              aarch64_get_vec_s64 (cpu, vn, i)
6413                              - aarch64_get_vec_s64 (cpu, vm, i));
6414       return;
6415     }
6416 }
6417
6418 static void
6419 do_vec_MLS (sim_cpu *cpu)
6420 {
6421   /* instr [31]    = 0
6422      instr [30]    = half(0)/full(1)
6423      instr [29,24] = 10 1110
6424      instr [23,22] = size: byte(00, half(01), word (10)
6425      instr [21]    = 1
6426      instr [20,16] = Vm
6427      instr [15,10] = 10 0101
6428      instr [9, 5]  = Vn
6429      instr [4, 0]  = Vd.  */
6430
6431   unsigned full = INSTR (30, 30);
6432   unsigned vm = INSTR (20, 16);
6433   unsigned vn = INSTR (9, 5);
6434   unsigned vd = INSTR (4, 0);
6435   unsigned i;
6436
6437   NYI_assert (29, 24, 0x2E);
6438   NYI_assert (21, 21, 1);
6439   NYI_assert (15, 10, 0x25);
6440
6441   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6442   switch (INSTR (23, 22))
6443     {
6444     case 0:
6445       for (i = 0; i < (full ? 16 : 8); i++)
6446         aarch64_set_vec_u8 (cpu, vd, i,
6447                             aarch64_get_vec_u8 (cpu, vd, i)
6448                             - (aarch64_get_vec_u8 (cpu, vn, i)
6449                                * aarch64_get_vec_u8 (cpu, vm, i)));
6450       return;
6451
6452     case 1:
6453       for (i = 0; i < (full ? 8 : 4); i++)
6454         aarch64_set_vec_u16 (cpu, vd, i,
6455                              aarch64_get_vec_u16 (cpu, vd, i)
6456                              - (aarch64_get_vec_u16 (cpu, vn, i)
6457                                 * aarch64_get_vec_u16 (cpu, vm, i)));
6458       return;
6459
6460     case 2:
6461       for (i = 0; i < (full ? 4 : 2); i++)
6462         aarch64_set_vec_u32 (cpu, vd, i,
6463                              aarch64_get_vec_u32 (cpu, vd, i)
6464                              - (aarch64_get_vec_u32 (cpu, vn, i)
6465                                 * aarch64_get_vec_u32 (cpu, vm, i)));
6466       return;
6467
6468     default:
6469       HALT_UNALLOC;
6470     }
6471 }
6472
6473 static void
6474 do_vec_FDIV (sim_cpu *cpu)
6475 {
6476   /* instr [31]    = 0
6477      instr [30]    = half(0)/full(1)
6478      instr [29,23] = 10 1110 0
6479      instr [22]    = float()/double(1)
6480      instr [21]    = 1
6481      instr [20,16] = Vm
6482      instr [15,10] = 1111 11
6483      instr [9, 5]  = Vn
6484      instr [4, 0]  = Vd.  */
6485
6486   unsigned full = INSTR (30, 30);
6487   unsigned vm = INSTR (20, 16);
6488   unsigned vn = INSTR (9, 5);
6489   unsigned vd = INSTR (4, 0);
6490   unsigned i;
6491
6492   NYI_assert (29, 23, 0x5C);
6493   NYI_assert (21, 21, 1);
6494   NYI_assert (15, 10, 0x3F);
6495
6496   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6497   if (INSTR (22, 22))
6498     {
6499       if (! full)
6500         HALT_UNALLOC;
6501
6502       for (i = 0; i < 2; i++)
6503         aarch64_set_vec_double (cpu, vd, i,
6504                                 aarch64_get_vec_double (cpu, vn, i)
6505                                 / aarch64_get_vec_double (cpu, vm, i));
6506     }
6507   else
6508     for (i = 0; i < (full ? 4 : 2); i++)
6509       aarch64_set_vec_float (cpu, vd, i,
6510                              aarch64_get_vec_float (cpu, vn, i)
6511                              / aarch64_get_vec_float (cpu, vm, i));
6512 }
6513
6514 static void
6515 do_vec_FMUL (sim_cpu *cpu)
6516 {
6517   /* instr [31]    = 0
6518      instr [30]    = half(0)/full(1)
6519      instr [29,23] = 10 1110 0
6520      instr [22]    = float(0)/double(1)
6521      instr [21]    = 1
6522      instr [20,16] = Vm
6523      instr [15,10] = 1101 11
6524      instr [9, 5]  = Vn
6525      instr [4, 0]  = Vd.  */
6526
6527   unsigned full = INSTR (30, 30);
6528   unsigned vm = INSTR (20, 16);
6529   unsigned vn = INSTR (9, 5);
6530   unsigned vd = INSTR (4, 0);
6531   unsigned i;
6532
6533   NYI_assert (29, 23, 0x5C);
6534   NYI_assert (21, 21, 1);
6535   NYI_assert (15, 10, 0x37);
6536
6537   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6538   if (INSTR (22, 22))
6539     {
6540       if (! full)
6541         HALT_UNALLOC;
6542
6543       for (i = 0; i < 2; i++)
6544         aarch64_set_vec_double (cpu, vd, i,
6545                                 aarch64_get_vec_double (cpu, vn, i)
6546                                 * aarch64_get_vec_double (cpu, vm, i));
6547     }
6548   else
6549     for (i = 0; i < (full ? 4 : 2); i++)
6550       aarch64_set_vec_float (cpu, vd, i,
6551                              aarch64_get_vec_float (cpu, vn, i)
6552                              * aarch64_get_vec_float (cpu, vm, i));
6553 }
6554
6555 static void
6556 do_vec_FADDP (sim_cpu *cpu)
6557 {
6558   /* instr [31]    = 0
6559      instr [30]    = half(0)/full(1)
6560      instr [29,23] = 10 1110 0
6561      instr [22]    = float(0)/double(1)
6562      instr [21]    = 1
6563      instr [20,16] = Vm
6564      instr [15,10] = 1101 01
6565      instr [9, 5]  = Vn
6566      instr [4, 0]  = Vd.  */
6567
6568   unsigned full = INSTR (30, 30);
6569   unsigned vm = INSTR (20, 16);
6570   unsigned vn = INSTR (9, 5);
6571   unsigned vd = INSTR (4, 0);
6572
6573   NYI_assert (29, 23, 0x5C);
6574   NYI_assert (21, 21, 1);
6575   NYI_assert (15, 10, 0x35);
6576
6577   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6578   if (INSTR (22, 22))
6579     {
6580       /* Extract values before adding them incase vd == vn/vm.  */
6581       double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6582       double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6583       double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6584       double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6585
6586       if (! full)
6587         HALT_UNALLOC;
6588
6589       aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6590       aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6591     }
6592   else
6593     {
6594       /* Extract values before adding them incase vd == vn/vm.  */
6595       float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6596       float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6597       float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6598       float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6599
6600       if (full)
6601         {
6602           float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6603           float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6604           float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6605           float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6606
6607           aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6608           aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6609           aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6610           aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6611         }
6612       else
6613         {
6614           aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6615           aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6616         }
6617     }
6618 }
6619
6620 static void
6621 do_vec_FSQRT (sim_cpu *cpu)
6622 {
6623   /* instr[31]    = 0
6624      instr[30]    = half(0)/full(1)
6625      instr[29,23] = 10 1110 1
6626      instr[22]    = single(0)/double(1)
6627      instr[21,10] = 10 0001 1111 10
6628      instr[9,5]   = Vsrc
6629      instr[4,0]   = Vdest.  */
6630
6631   unsigned vn = INSTR (9, 5);
6632   unsigned vd = INSTR (4, 0);
6633   unsigned full = INSTR (30, 30);
6634   int i;
6635
6636   NYI_assert (29, 23, 0x5D);
6637   NYI_assert (21, 10, 0x87E);
6638
6639   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6640   if (INSTR (22, 22))
6641     {
6642       if (! full)
6643         HALT_UNALLOC;
6644
6645       for (i = 0; i < 2; i++)
6646         aarch64_set_vec_double (cpu, vd, i,
6647                                 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6648     }
6649   else
6650     {
6651       for (i = 0; i < (full ? 4 : 2); i++)
6652         aarch64_set_vec_float (cpu, vd, i,
6653                                sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6654     }
6655 }
6656
6657 static void
6658 do_vec_FNEG (sim_cpu *cpu)
6659 {
6660   /* instr[31]    = 0
6661      instr[30]    = half (0)/full (1)
6662      instr[29,23] = 10 1110 1
6663      instr[22]    = single (0)/double (1)
6664      instr[21,10] = 10 0000 1111 10
6665      instr[9,5]   = Vsrc
6666      instr[4,0]   = Vdest.  */
6667
6668   unsigned vn = INSTR (9, 5);
6669   unsigned vd = INSTR (4, 0);
6670   unsigned full = INSTR (30, 30);
6671   int i;
6672
6673   NYI_assert (29, 23, 0x5D);
6674   NYI_assert (21, 10, 0x83E);
6675
6676   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6677   if (INSTR (22, 22))
6678     {
6679       if (! full)
6680         HALT_UNALLOC;
6681
6682       for (i = 0; i < 2; i++)
6683         aarch64_set_vec_double (cpu, vd, i,
6684                                 - aarch64_get_vec_double (cpu, vn, i));
6685     }
6686   else
6687     {
6688       for (i = 0; i < (full ? 4 : 2); i++)
6689         aarch64_set_vec_float (cpu, vd, i,
6690                                - aarch64_get_vec_float (cpu, vn, i));
6691     }
6692 }
6693
6694 static void
6695 do_vec_NOT (sim_cpu *cpu)
6696 {
6697   /* instr[31]    = 0
6698      instr[30]    = half (0)/full (1)
6699      instr[29,10] = 10 1110 0010 0000 0101 10
6700      instr[9,5]   = Vn
6701      instr[4.0]   = Vd.  */
6702
6703   unsigned vn = INSTR (9, 5);
6704   unsigned vd = INSTR (4, 0);
6705   unsigned i;
6706   int      full = INSTR (30, 30);
6707
6708   NYI_assert (29, 10, 0xB8816);
6709
6710   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6711   for (i = 0; i < (full ? 16 : 8); i++)
6712     aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6713 }
6714
6715 static unsigned int
6716 clz (uint64_t val, unsigned size)
6717 {
6718   uint64_t mask = 1;
6719   int      count;
6720
6721   mask <<= (size - 1);
6722   count = 0;
6723   do
6724     {
6725       if (val & mask)
6726         break;
6727       mask >>= 1;
6728       count ++;
6729     }
6730   while (mask);
6731
6732   return count;
6733 }
6734
6735 static void
6736 do_vec_CLZ (sim_cpu *cpu)
6737 {
6738   /* instr[31]    = 0
6739      instr[30]    = half (0)/full (1)
6740      instr[29,24] = 10 1110
6741      instr[23,22] = size
6742      instr[21,10] = 10 0000 0100 10
6743      instr[9,5]   = Vn
6744      instr[4.0]   = Vd.  */
6745
6746   unsigned vn = INSTR (9, 5);
6747   unsigned vd = INSTR (4, 0);
6748   unsigned i;
6749   int      full = INSTR (30,30);
6750
6751   NYI_assert (29, 24, 0x2E);
6752   NYI_assert (21, 10, 0x812);
6753
6754   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6755   switch (INSTR (23, 22))
6756     {
6757     case 0:
6758       for (i = 0; i < (full ? 16 : 8); i++)
6759         aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6760       break;
6761     case 1:
6762       for (i = 0; i < (full ? 8 : 4); i++)
6763         aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6764       break;
6765     case 2:
6766       for (i = 0; i < (full ? 4 : 2); i++)
6767         aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6768       break;
6769     case 3:
6770       if (! full)
6771         HALT_UNALLOC;
6772       aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6773       aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6774       break;
6775     }
6776 }
6777
6778 static void
6779 do_vec_MOV_element (sim_cpu *cpu)
6780 {
6781   /* instr[31,21] = 0110 1110 000
6782      instr[20,16] = size & dest index
6783      instr[15]    = 0
6784      instr[14,11] = source index
6785      instr[10]    = 1
6786      instr[9,5]   = Vs
6787      instr[4.0]   = Vd.  */
6788
6789   unsigned vs = INSTR (9, 5);
6790   unsigned vd = INSTR (4, 0);
6791   unsigned src_index;
6792   unsigned dst_index;
6793
6794   NYI_assert (31, 21, 0x370);
6795   NYI_assert (15, 15, 0);
6796   NYI_assert (10, 10, 1);
6797
6798   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6799   if (INSTR (16, 16))
6800     {
6801       /* Move a byte.  */
6802       src_index = INSTR (14, 11);
6803       dst_index = INSTR (20, 17);
6804       aarch64_set_vec_u8 (cpu, vd, dst_index,
6805                           aarch64_get_vec_u8 (cpu, vs, src_index));
6806     }
6807   else if (INSTR (17, 17))
6808     {
6809       /* Move 16-bits.  */
6810       NYI_assert (11, 11, 0);
6811       src_index = INSTR (14, 12);
6812       dst_index = INSTR (20, 18);
6813       aarch64_set_vec_u16 (cpu, vd, dst_index,
6814                            aarch64_get_vec_u16 (cpu, vs, src_index));
6815     }
6816   else if (INSTR (18, 18))
6817     {
6818       /* Move 32-bits.  */
6819       NYI_assert (12, 11, 0);
6820       src_index = INSTR (14, 13);
6821       dst_index = INSTR (20, 19);
6822       aarch64_set_vec_u32 (cpu, vd, dst_index,
6823                            aarch64_get_vec_u32 (cpu, vs, src_index));
6824     }
6825   else
6826     {
6827       NYI_assert (19, 19, 1);
6828       NYI_assert (13, 11, 0);
6829       src_index = INSTR (14, 14);
6830       dst_index = INSTR (20, 20);
6831       aarch64_set_vec_u64 (cpu, vd, dst_index,
6832                            aarch64_get_vec_u64 (cpu, vs, src_index));
6833     }
6834 }
6835
6836 static void
6837 do_vec_REV32 (sim_cpu *cpu)
6838 {
6839   /* instr[31]    = 0
6840      instr[30]    = full/half
6841      instr[29,24] = 10 1110
6842      instr[23,22] = size
6843      instr[21,10] = 10 0000 0000 10
6844      instr[9,5]   = Rn
6845      instr[4,0]   = Rd.  */
6846
6847   unsigned rn = INSTR (9, 5);
6848   unsigned rd = INSTR (4, 0);
6849   unsigned size = INSTR (23, 22);
6850   unsigned full = INSTR (30, 30);
6851   unsigned i;
6852   FRegister val;
6853
6854   NYI_assert (29, 24, 0x2E);
6855   NYI_assert (21, 10, 0x802);
6856
6857   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6858   switch (size)
6859     {
6860     case 0:
6861       for (i = 0; i < (full ? 16 : 8); i++)
6862         val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6863       break;
6864
6865     case 1:
6866       for (i = 0; i < (full ? 8 : 4); i++)
6867         val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6868       break;
6869
6870     default:
6871       HALT_UNALLOC;
6872     }
6873
6874   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6875   if (full)
6876     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6877 }
6878
6879 static void
6880 do_vec_EXT (sim_cpu *cpu)
6881 {
6882   /* instr[31]    = 0
6883      instr[30]    = full/half
6884      instr[29,21] = 10 1110 000
6885      instr[20,16] = Vm
6886      instr[15]    = 0
6887      instr[14,11] = source index
6888      instr[10]    = 0
6889      instr[9,5]   = Vn
6890      instr[4.0]   = Vd.  */
6891
6892   unsigned vm = INSTR (20, 16);
6893   unsigned vn = INSTR (9, 5);
6894   unsigned vd = INSTR (4, 0);
6895   unsigned src_index = INSTR (14, 11);
6896   unsigned full = INSTR (30, 30);
6897   unsigned i;
6898   unsigned j;
6899   FRegister val;
6900
6901   NYI_assert (31, 21, 0x370);
6902   NYI_assert (15, 15, 0);
6903   NYI_assert (10, 10, 0);
6904
6905   if (!full && (src_index & 0x8))
6906     HALT_UNALLOC;
6907
6908   j = 0;
6909
6910   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6911   for (i = src_index; i < (full ? 16 : 8); i++)
6912     val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6913   for (i = 0; i < src_index; i++)
6914     val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6915
6916   aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6917   if (full)
6918     aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6919 }
6920
6921 static void
6922 dexAdvSIMD0 (sim_cpu *cpu)
6923 {
6924   /* instr [28,25] = 0 111.  */
6925   if (    INSTR (15, 10) == 0x07
6926       && (INSTR (9, 5) ==
6927           INSTR (20, 16)))
6928     {
6929       if (INSTR (31, 21) == 0x075
6930           || INSTR (31, 21) == 0x275)
6931         {
6932           do_vec_MOV_whole_vector (cpu);
6933           return;
6934         }
6935     }
6936
6937   if (INSTR (29, 19) == 0x1E0)
6938     {
6939       do_vec_MOV_immediate (cpu);
6940       return;
6941     }
6942
6943   if (INSTR (29, 19) == 0x5E0)
6944     {
6945       do_vec_MVNI (cpu);
6946       return;
6947     }
6948
6949   if (INSTR (29, 19) == 0x1C0
6950       || INSTR (29, 19) == 0x1C1)
6951     {
6952       if (INSTR (15, 10) == 0x03)
6953         {
6954           do_vec_DUP_scalar_into_vector (cpu);
6955           return;
6956         }
6957     }
6958
6959   switch (INSTR (29, 24))
6960     {
6961     case 0x0E: do_vec_op1 (cpu); return;
6962     case 0x0F: do_vec_op2 (cpu); return;
6963
6964     case 0x2E:
6965       if (INSTR (21, 21) == 1)
6966         {
6967           switch (INSTR (15, 10))
6968             {
6969             case 0x02:
6970               do_vec_REV32 (cpu);
6971               return;
6972
6973             case 0x07:
6974               switch (INSTR (23, 22))
6975                 {
6976                 case 0: do_vec_EOR (cpu); return;
6977                 case 1: do_vec_BSL (cpu); return;
6978                 case 2:
6979                 case 3: do_vec_bit (cpu); return;
6980                 }
6981               break;
6982
6983             case 0x08: do_vec_sub_long (cpu); return;
6984             case 0x11: do_vec_USHL (cpu); return;
6985             case 0x12: do_vec_CLZ (cpu); return;
6986             case 0x16: do_vec_NOT (cpu); return;
6987             case 0x19: do_vec_max (cpu); return;
6988             case 0x1B: do_vec_min (cpu); return;
6989             case 0x21: do_vec_SUB (cpu); return;
6990             case 0x25: do_vec_MLS (cpu); return;
6991             case 0x31: do_vec_FminmaxNMP (cpu); return;
6992             case 0x35: do_vec_FADDP (cpu); return;
6993             case 0x37: do_vec_FMUL (cpu); return;
6994             case 0x3F: do_vec_FDIV (cpu); return;
6995
6996             case 0x3E:
6997               switch (INSTR (20, 16))
6998                 {
6999                 case 0x00: do_vec_FNEG (cpu); return;
7000                 case 0x01: do_vec_FSQRT (cpu); return;
7001                 default:   HALT_NYI;
7002                 }
7003
7004             case 0x0D:
7005             case 0x0F:
7006             case 0x22:
7007             case 0x23:
7008             case 0x26:
7009             case 0x2A:
7010             case 0x32:
7011             case 0x36:
7012             case 0x39:
7013             case 0x3A:
7014               do_vec_compare (cpu); return;
7015
7016             default:
7017               break;
7018             }
7019         }
7020
7021       if (INSTR (31, 21) == 0x370)
7022         {
7023           if (INSTR (10, 10))
7024             do_vec_MOV_element (cpu);
7025           else
7026             do_vec_EXT (cpu);
7027           return;
7028         }
7029
7030       switch (INSTR (21, 10))
7031         {
7032         case 0x82E: do_vec_neg (cpu); return;
7033         case 0x87E: do_vec_sqrt (cpu); return;
7034         default:
7035           if (INSTR (15, 10) == 0x30)
7036             {
7037               do_vec_mull (cpu);
7038               return;
7039             }
7040           break;
7041         }
7042       break;
7043
7044     case 0x2f:
7045       switch (INSTR (15, 10))
7046         {
7047         case 0x01: do_vec_SSHR_USHR (cpu); return;
7048         case 0x10:
7049         case 0x12: do_vec_mls_indexed (cpu); return;
7050         case 0x29: do_vec_xtl (cpu); return;
7051         default:
7052           HALT_NYI;
7053         }
7054
7055     default:
7056       break;
7057     }
7058
7059   HALT_NYI;
7060 }
7061
7062 /* 3 sources.  */
7063
7064 /* Float multiply add.  */
7065 static void
7066 fmadds (sim_cpu *cpu)
7067 {
7068   unsigned sa = INSTR (14, 10);
7069   unsigned sm = INSTR (20, 16);
7070   unsigned sn = INSTR ( 9,  5);
7071   unsigned sd = INSTR ( 4,  0);
7072
7073   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7074   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7075                         + aarch64_get_FP_float (cpu, sn)
7076                         * aarch64_get_FP_float (cpu, sm));
7077 }
7078
7079 /* Double multiply add.  */
7080 static void
7081 fmaddd (sim_cpu *cpu)
7082 {
7083   unsigned sa = INSTR (14, 10);
7084   unsigned sm = INSTR (20, 16);
7085   unsigned sn = INSTR ( 9,  5);
7086   unsigned sd = INSTR ( 4,  0);
7087
7088   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7089   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7090                          + aarch64_get_FP_double (cpu, sn)
7091                          * aarch64_get_FP_double (cpu, sm));
7092 }
7093
7094 /* Float multiply subtract.  */
7095 static void
7096 fmsubs (sim_cpu *cpu)
7097 {
7098   unsigned sa = INSTR (14, 10);
7099   unsigned sm = INSTR (20, 16);
7100   unsigned sn = INSTR ( 9,  5);
7101   unsigned sd = INSTR ( 4,  0);
7102
7103   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7104   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7105                         - aarch64_get_FP_float (cpu, sn)
7106                         * aarch64_get_FP_float (cpu, sm));
7107 }
7108
7109 /* Double multiply subtract.  */
7110 static void
7111 fmsubd (sim_cpu *cpu)
7112 {
7113   unsigned sa = INSTR (14, 10);
7114   unsigned sm = INSTR (20, 16);
7115   unsigned sn = INSTR ( 9,  5);
7116   unsigned sd = INSTR ( 4,  0);
7117
7118   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7119   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7120                          - aarch64_get_FP_double (cpu, sn)
7121                          * aarch64_get_FP_double (cpu, sm));
7122 }
7123
7124 /* Float negative multiply add.  */
7125 static void
7126 fnmadds (sim_cpu *cpu)
7127 {
7128   unsigned sa = INSTR (14, 10);
7129   unsigned sm = INSTR (20, 16);
7130   unsigned sn = INSTR ( 9,  5);
7131   unsigned sd = INSTR ( 4,  0);
7132
7133   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7134   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7135                         + (- aarch64_get_FP_float (cpu, sn))
7136                         * aarch64_get_FP_float (cpu, sm));
7137 }
7138
7139 /* Double negative multiply add.  */
7140 static void
7141 fnmaddd (sim_cpu *cpu)
7142 {
7143   unsigned sa = INSTR (14, 10);
7144   unsigned sm = INSTR (20, 16);
7145   unsigned sn = INSTR ( 9,  5);
7146   unsigned sd = INSTR ( 4,  0);
7147
7148   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7149   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7150                          + (- aarch64_get_FP_double (cpu, sn))
7151                          * aarch64_get_FP_double (cpu, sm));
7152 }
7153
7154 /* Float negative multiply subtract.  */
7155 static void
7156 fnmsubs (sim_cpu *cpu)
7157 {
7158   unsigned sa = INSTR (14, 10);
7159   unsigned sm = INSTR (20, 16);
7160   unsigned sn = INSTR ( 9,  5);
7161   unsigned sd = INSTR ( 4,  0);
7162
7163   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7164   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7165                         + aarch64_get_FP_float (cpu, sn)
7166                         * aarch64_get_FP_float (cpu, sm));
7167 }
7168
7169 /* Double negative multiply subtract.  */
7170 static void
7171 fnmsubd (sim_cpu *cpu)
7172 {
7173   unsigned sa = INSTR (14, 10);
7174   unsigned sm = INSTR (20, 16);
7175   unsigned sn = INSTR ( 9,  5);
7176   unsigned sd = INSTR ( 4,  0);
7177
7178   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7179   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7180                          + aarch64_get_FP_double (cpu, sn)
7181                          * aarch64_get_FP_double (cpu, sm));
7182 }
7183
7184 static void
7185 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7186 {
7187   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
7188      instr[30]    = 0
7189      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
7190      instr[28,25] = 1111
7191      instr[24]    = 1
7192      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7193      instr[21]    ==> o1 : 0 ==> unnegated, 1 ==> negated
7194      instr[15]    ==> o2 : 0 ==> ADD, 1 ==> SUB  */
7195
7196   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7197   /* dispatch on combined type:o1:o2.  */
7198   uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7199
7200   if (M_S != 0)
7201     HALT_UNALLOC;
7202
7203   switch (dispatch)
7204     {
7205     case 0: fmadds (cpu); return;
7206     case 1: fmsubs (cpu); return;
7207     case 2: fnmadds (cpu); return;
7208     case 3: fnmsubs (cpu); return;
7209     case 4: fmaddd (cpu); return;
7210     case 5: fmsubd (cpu); return;
7211     case 6: fnmaddd (cpu); return;
7212     case 7: fnmsubd (cpu); return;
7213     default:
7214       /* type > 1 is currently unallocated.  */
7215       HALT_UNALLOC;
7216     }
7217 }
7218
7219 static void
7220 dexSimpleFPFixedConvert (sim_cpu *cpu)
7221 {
7222   HALT_NYI;
7223 }
7224
7225 static void
7226 dexSimpleFPCondCompare (sim_cpu *cpu)
7227 {
7228   /* instr [31,23] = 0001 1110 0
7229      instr [22]    = type
7230      instr [21]    = 1
7231      instr [20,16] = Rm
7232      instr [15,12] = condition
7233      instr [11,10] = 01
7234      instr [9,5]   = Rn
7235      instr [4]     = 0
7236      instr [3,0]   = nzcv  */
7237
7238   unsigned rm = INSTR (20, 16);
7239   unsigned rn = INSTR (9, 5);
7240
7241   NYI_assert (31, 23, 0x3C);
7242   NYI_assert (11, 10, 0x1);
7243   NYI_assert (4,  4,  0);
7244
7245   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7246   if (! testConditionCode (cpu, INSTR (15, 12)))
7247     {
7248       aarch64_set_CPSR (cpu, INSTR (3, 0));
7249       return;
7250     }
7251
7252   if (INSTR (22, 22))
7253     {
7254       /* Double precision.  */
7255       double val1 = aarch64_get_vec_double (cpu, rn, 0);
7256       double val2 = aarch64_get_vec_double (cpu, rm, 0);
7257
7258       /* FIXME: Check for NaNs.  */
7259       if (val1 == val2)
7260         aarch64_set_CPSR (cpu, (Z | C));
7261       else if (val1 < val2)
7262         aarch64_set_CPSR (cpu, N);
7263       else /* val1 > val2 */
7264         aarch64_set_CPSR (cpu, C);
7265     }
7266   else
7267     {
7268       /* Single precision.  */
7269       float val1 = aarch64_get_vec_float (cpu, rn, 0);
7270       float val2 = aarch64_get_vec_float (cpu, rm, 0);
7271
7272       /* FIXME: Check for NaNs.  */
7273       if (val1 == val2)
7274         aarch64_set_CPSR (cpu, (Z | C));
7275       else if (val1 < val2)
7276         aarch64_set_CPSR (cpu, N);
7277       else /* val1 > val2 */
7278         aarch64_set_CPSR (cpu, C);
7279     }
7280 }
7281
7282 /* 2 sources.  */
7283
7284 /* Float add.  */
7285 static void
7286 fadds (sim_cpu *cpu)
7287 {
7288   unsigned sm = INSTR (20, 16);
7289   unsigned sn = INSTR ( 9,  5);
7290   unsigned sd = INSTR ( 4,  0);
7291
7292   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7293   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7294                         + aarch64_get_FP_float (cpu, sm));
7295 }
7296
7297 /* Double add.  */
7298 static void
7299 faddd (sim_cpu *cpu)
7300 {
7301   unsigned sm = INSTR (20, 16);
7302   unsigned sn = INSTR ( 9,  5);
7303   unsigned sd = INSTR ( 4,  0);
7304
7305   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7306   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7307                          + aarch64_get_FP_double (cpu, sm));
7308 }
7309
7310 /* Float divide.  */
7311 static void
7312 fdivs (sim_cpu *cpu)
7313 {
7314   unsigned sm = INSTR (20, 16);
7315   unsigned sn = INSTR ( 9,  5);
7316   unsigned sd = INSTR ( 4,  0);
7317
7318   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7319   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7320                         / aarch64_get_FP_float (cpu, sm));
7321 }
7322
7323 /* Double divide.  */
7324 static void
7325 fdivd (sim_cpu *cpu)
7326 {
7327   unsigned sm = INSTR (20, 16);
7328   unsigned sn = INSTR ( 9,  5);
7329   unsigned sd = INSTR ( 4,  0);
7330
7331   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7332   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7333                          / aarch64_get_FP_double (cpu, sm));
7334 }
7335
7336 /* Float multiply.  */
7337 static void
7338 fmuls (sim_cpu *cpu)
7339 {
7340   unsigned sm = INSTR (20, 16);
7341   unsigned sn = INSTR ( 9,  5);
7342   unsigned sd = INSTR ( 4,  0);
7343
7344   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7345   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7346                         * aarch64_get_FP_float (cpu, sm));
7347 }
7348
7349 /* Double multiply.  */
7350 static void
7351 fmuld (sim_cpu *cpu)
7352 {
7353   unsigned sm = INSTR (20, 16);
7354   unsigned sn = INSTR ( 9,  5);
7355   unsigned sd = INSTR ( 4,  0);
7356
7357   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7358   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7359                          * aarch64_get_FP_double (cpu, sm));
7360 }
7361
7362 /* Float negate and multiply.  */
7363 static void
7364 fnmuls (sim_cpu *cpu)
7365 {
7366   unsigned sm = INSTR (20, 16);
7367   unsigned sn = INSTR ( 9,  5);
7368   unsigned sd = INSTR ( 4,  0);
7369
7370   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7371   aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7372                                     * aarch64_get_FP_float (cpu, sm)));
7373 }
7374
7375 /* Double negate and multiply.  */
7376 static void
7377 fnmuld (sim_cpu *cpu)
7378 {
7379   unsigned sm = INSTR (20, 16);
7380   unsigned sn = INSTR ( 9,  5);
7381   unsigned sd = INSTR ( 4,  0);
7382
7383   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7384   aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7385                                      * aarch64_get_FP_double (cpu, sm)));
7386 }
7387
7388 /* Float subtract.  */
7389 static void
7390 fsubs (sim_cpu *cpu)
7391 {
7392   unsigned sm = INSTR (20, 16);
7393   unsigned sn = INSTR ( 9,  5);
7394   unsigned sd = INSTR ( 4,  0);
7395
7396   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7397   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7398                         - aarch64_get_FP_float (cpu, sm));
7399 }
7400
7401 /* Double subtract.  */
7402 static void
7403 fsubd (sim_cpu *cpu)
7404 {
7405   unsigned sm = INSTR (20, 16);
7406   unsigned sn = INSTR ( 9,  5);
7407   unsigned sd = INSTR ( 4,  0);
7408
7409   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7410   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7411                          - aarch64_get_FP_double (cpu, sm));
7412 }
7413
7414 static void
7415 do_FMINNM (sim_cpu *cpu)
7416 {
7417   /* instr[31,23] = 0 0011 1100
7418      instr[22]    = float(0)/double(1)
7419      instr[21]    = 1
7420      instr[20,16] = Sm
7421      instr[15,10] = 01 1110
7422      instr[9,5]   = Sn
7423      instr[4,0]   = Cpu  */
7424
7425   unsigned sm = INSTR (20, 16);
7426   unsigned sn = INSTR ( 9,  5);
7427   unsigned sd = INSTR ( 4,  0);
7428
7429   NYI_assert (31, 23, 0x03C);
7430   NYI_assert (15, 10, 0x1E);
7431
7432   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7433   if (INSTR (22, 22))
7434     aarch64_set_FP_double (cpu, sd,
7435                            dminnm (aarch64_get_FP_double (cpu, sn),
7436                                    aarch64_get_FP_double (cpu, sm)));
7437   else
7438     aarch64_set_FP_float (cpu, sd,
7439                           fminnm (aarch64_get_FP_float (cpu, sn),
7440                                   aarch64_get_FP_float (cpu, sm)));
7441 }
7442
7443 static void
7444 do_FMAXNM (sim_cpu *cpu)
7445 {
7446   /* instr[31,23] = 0 0011 1100
7447      instr[22]    = float(0)/double(1)
7448      instr[21]    = 1
7449      instr[20,16] = Sm
7450      instr[15,10] = 01 1010
7451      instr[9,5]   = Sn
7452      instr[4,0]   = Cpu  */
7453
7454   unsigned sm = INSTR (20, 16);
7455   unsigned sn = INSTR ( 9,  5);
7456   unsigned sd = INSTR ( 4,  0);
7457
7458   NYI_assert (31, 23, 0x03C);
7459   NYI_assert (15, 10, 0x1A);
7460
7461   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7462   if (INSTR (22, 22))
7463     aarch64_set_FP_double (cpu, sd,
7464                            dmaxnm (aarch64_get_FP_double (cpu, sn),
7465                                    aarch64_get_FP_double (cpu, sm)));
7466   else
7467     aarch64_set_FP_float (cpu, sd,
7468                           fmaxnm (aarch64_get_FP_float (cpu, sn),
7469                                   aarch64_get_FP_float (cpu, sm)));
7470 }
7471
7472 static void
7473 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7474 {
7475   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
7476      instr[30]    = 0
7477      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
7478      instr[28,25] = 1111
7479      instr[24]    = 0
7480      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7481      instr[21]    = 1
7482      instr[20,16] = Vm
7483      instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7484                                0010 ==> FADD, 0011 ==> FSUB,
7485                                0100 ==> FMAX, 0101 ==> FMIN
7486                                0110 ==> FMAXNM, 0111 ==> FMINNM
7487                                1000 ==> FNMUL, ow ==> UNALLOC
7488      instr[11,10] = 10
7489      instr[9,5]   = Vn
7490      instr[4,0]   = Vd  */
7491
7492   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7493   uint32_t type = INSTR (23, 22);
7494   /* Dispatch on opcode.  */
7495   uint32_t dispatch = INSTR (15, 12);
7496
7497   if (type > 1)
7498     HALT_UNALLOC;
7499
7500   if (M_S != 0)
7501     HALT_UNALLOC;
7502
7503   if (type)
7504     switch (dispatch)
7505       {
7506       case 0: fmuld (cpu); return;
7507       case 1: fdivd (cpu); return;
7508       case 2: faddd (cpu); return;
7509       case 3: fsubd (cpu); return;
7510       case 6: do_FMAXNM (cpu); return;
7511       case 7: do_FMINNM (cpu); return;
7512       case 8: fnmuld (cpu); return;
7513
7514         /* Have not yet implemented fmax and fmin.  */
7515       case 4:
7516       case 5:
7517         HALT_NYI;
7518
7519       default:
7520         HALT_UNALLOC;
7521       }
7522   else /* type == 0 => floats.  */
7523     switch (dispatch)
7524       {
7525       case 0: fmuls (cpu); return;
7526       case 1: fdivs (cpu); return;
7527       case 2: fadds (cpu); return;
7528       case 3: fsubs (cpu); return;
7529       case 6: do_FMAXNM (cpu); return;
7530       case 7: do_FMINNM (cpu); return;
7531       case 8: fnmuls (cpu); return;
7532
7533       case 4:
7534       case 5:
7535         HALT_NYI;
7536
7537       default:
7538         HALT_UNALLOC;
7539       }
7540 }
7541
7542 static void
7543 dexSimpleFPCondSelect (sim_cpu *cpu)
7544 {
7545   /* FCSEL
7546      instr[31,23] = 0 0011 1100
7547      instr[22]    = 0=>single 1=>double
7548      instr[21]    = 1
7549      instr[20,16] = Sm
7550      instr[15,12] = cond
7551      instr[11,10] = 11
7552      instr[9,5]   = Sn
7553      instr[4,0]   = Cpu  */
7554   unsigned sm = INSTR (20, 16);
7555   unsigned sn = INSTR ( 9, 5);
7556   unsigned sd = INSTR ( 4, 0);
7557   uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7558
7559   NYI_assert (31, 23, 0x03C);
7560   NYI_assert (11, 10, 0x3);
7561
7562   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7563   if (INSTR (22, 22))
7564     aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7565                                      : aarch64_get_FP_double (cpu, sm)));
7566   else
7567     aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7568                                     : aarch64_get_FP_float (cpu, sm)));
7569 }
7570
7571 /* Store 32 bit unscaled signed 9 bit.  */
7572 static void
7573 fsturs (sim_cpu *cpu, int32_t offset)
7574 {
7575   unsigned int rn = INSTR (9, 5);
7576   unsigned int st = INSTR (4, 0);
7577
7578   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7579   aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7580                        aarch64_get_vec_u32 (cpu, st, 0));
7581 }
7582
7583 /* Store 64 bit unscaled signed 9 bit.  */
7584 static void
7585 fsturd (sim_cpu *cpu, int32_t offset)
7586 {
7587   unsigned int rn = INSTR (9, 5);
7588   unsigned int st = INSTR (4, 0);
7589
7590   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7591   aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7592                        aarch64_get_vec_u64 (cpu, st, 0));
7593 }
7594
7595 /* Store 128 bit unscaled signed 9 bit.  */
7596 static void
7597 fsturq (sim_cpu *cpu, int32_t offset)
7598 {
7599   unsigned int rn = INSTR (9, 5);
7600   unsigned int st = INSTR (4, 0);
7601   FRegister a;
7602
7603   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7604   aarch64_get_FP_long_double (cpu, st, & a);
7605   aarch64_set_mem_long_double (cpu,
7606                                aarch64_get_reg_u64 (cpu, rn, 1)
7607                                + offset, a);
7608 }
7609
7610 /* TODO FP move register.  */
7611
7612 /* 32 bit fp to fp move register.  */
7613 static void
7614 ffmovs (sim_cpu *cpu)
7615 {
7616   unsigned int rn = INSTR (9, 5);
7617   unsigned int st = INSTR (4, 0);
7618
7619   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7620   aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7621 }
7622
7623 /* 64 bit fp to fp move register.  */
7624 static void
7625 ffmovd (sim_cpu *cpu)
7626 {
7627   unsigned int rn = INSTR (9, 5);
7628   unsigned int st = INSTR (4, 0);
7629
7630   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7631   aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7632 }
7633
7634 /* 32 bit GReg to Vec move register.  */
7635 static void
7636 fgmovs (sim_cpu *cpu)
7637 {
7638   unsigned int rn = INSTR (9, 5);
7639   unsigned int st = INSTR (4, 0);
7640
7641   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7642   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7643 }
7644
7645 /* 64 bit g to fp move register.  */
7646 static void
7647 fgmovd (sim_cpu *cpu)
7648 {
7649   unsigned int rn = INSTR (9, 5);
7650   unsigned int st = INSTR (4, 0);
7651
7652   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7653   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7654 }
7655
7656 /* 32 bit fp to g move register.  */
7657 static void
7658 gfmovs (sim_cpu *cpu)
7659 {
7660   unsigned int rn = INSTR (9, 5);
7661   unsigned int st = INSTR (4, 0);
7662
7663   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7664   aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7665 }
7666
7667 /* 64 bit fp to g move register.  */
7668 static void
7669 gfmovd (sim_cpu *cpu)
7670 {
7671   unsigned int rn = INSTR (9, 5);
7672   unsigned int st = INSTR (4, 0);
7673
7674   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7675   aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7676 }
7677
7678 /* FP move immediate
7679
7680    These install an immediate 8 bit value in the target register
7681    where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7682    bit exponent.  */
7683
7684 static void
7685 fmovs (sim_cpu *cpu)
7686 {
7687   unsigned int sd = INSTR (4, 0);
7688   uint32_t imm = INSTR (20, 13);
7689   float f = fp_immediate_for_encoding_32 (imm);
7690
7691   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7692   aarch64_set_FP_float (cpu, sd, f);
7693 }
7694
7695 static void
7696 fmovd (sim_cpu *cpu)
7697 {
7698   unsigned int sd = INSTR (4, 0);
7699   uint32_t imm = INSTR (20, 13);
7700   double d = fp_immediate_for_encoding_64 (imm);
7701
7702   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7703   aarch64_set_FP_double (cpu, sd, d);
7704 }
7705
7706 static void
7707 dexSimpleFPImmediate (sim_cpu *cpu)
7708 {
7709   /* instr[31,23] == 00111100
7710      instr[22]    == type : single(0)/double(1)
7711      instr[21]    == 1
7712      instr[20,13] == imm8
7713      instr[12,10] == 100
7714      instr[9,5]   == imm5 : 00000 ==> PK, ow ==> UNALLOC
7715      instr[4,0]   == Rd  */
7716   uint32_t imm5 = INSTR (9, 5);
7717
7718   NYI_assert (31, 23, 0x3C);
7719
7720   if (imm5 != 0)
7721     HALT_UNALLOC;
7722
7723   if (INSTR (22, 22))
7724     fmovd (cpu);
7725   else
7726     fmovs (cpu);
7727 }
7728
7729 /* TODO specific decode and execute for group Load Store.  */
7730
7731 /* TODO FP load/store single register (unscaled offset).  */
7732
7733 /* TODO load 8 bit unscaled signed 9 bit.  */
7734 /* TODO load 16 bit unscaled signed 9 bit.  */
7735
7736 /* Load 32 bit unscaled signed 9 bit.  */
7737 static void
7738 fldurs (sim_cpu *cpu, int32_t offset)
7739 {
7740   unsigned int rn = INSTR (9, 5);
7741   unsigned int st = INSTR (4, 0);
7742
7743   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7744   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7745                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7746 }
7747
7748 /* Load 64 bit unscaled signed 9 bit.  */
7749 static void
7750 fldurd (sim_cpu *cpu, int32_t offset)
7751 {
7752   unsigned int rn = INSTR (9, 5);
7753   unsigned int st = INSTR (4, 0);
7754
7755   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7756   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7757                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7758 }
7759
7760 /* Load 128 bit unscaled signed 9 bit.  */
7761 static void
7762 fldurq (sim_cpu *cpu, int32_t offset)
7763 {
7764   unsigned int rn = INSTR (9, 5);
7765   unsigned int st = INSTR (4, 0);
7766   FRegister a;
7767   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7768
7769   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7770   aarch64_get_mem_long_double (cpu, addr, & a);
7771   aarch64_set_FP_long_double (cpu, st, a);
7772 }
7773
7774 /* TODO store 8 bit unscaled signed 9 bit.  */
7775 /* TODO store 16 bit unscaled signed 9 bit.  */
7776
7777
7778 /* 1 source.  */
7779
7780 /* Float absolute value.  */
7781 static void
7782 fabss (sim_cpu *cpu)
7783 {
7784   unsigned sn = INSTR (9, 5);
7785   unsigned sd = INSTR (4, 0);
7786   float value = aarch64_get_FP_float (cpu, sn);
7787
7788   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7789   aarch64_set_FP_float (cpu, sd, fabsf (value));
7790 }
7791
7792 /* Double absolute value.  */
7793 static void
7794 fabcpu (sim_cpu *cpu)
7795 {
7796   unsigned sn = INSTR (9, 5);
7797   unsigned sd = INSTR (4, 0);
7798   double value = aarch64_get_FP_double (cpu, sn);
7799
7800   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7801   aarch64_set_FP_double (cpu, sd, fabs (value));
7802 }
7803
7804 /* Float negative value.  */
7805 static void
7806 fnegs (sim_cpu *cpu)
7807 {
7808   unsigned sn = INSTR (9, 5);
7809   unsigned sd = INSTR (4, 0);
7810
7811   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7812   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7813 }
7814
7815 /* Double negative value.  */
7816 static void
7817 fnegd (sim_cpu *cpu)
7818 {
7819   unsigned sn = INSTR (9, 5);
7820   unsigned sd = INSTR (4, 0);
7821
7822   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7823   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7824 }
7825
7826 /* Float square root.  */
7827 static void
7828 fsqrts (sim_cpu *cpu)
7829 {
7830   unsigned sn = INSTR (9, 5);
7831   unsigned sd = INSTR (4, 0);
7832
7833   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7834   aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7835 }
7836
7837 /* Double square root.  */
7838 static void
7839 fsqrtd (sim_cpu *cpu)
7840 {
7841   unsigned sn = INSTR (9, 5);
7842   unsigned sd = INSTR (4, 0);
7843
7844   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7845   aarch64_set_FP_double (cpu, sd,
7846                          sqrt (aarch64_get_FP_double (cpu, sn)));
7847 }
7848
7849 /* Convert double to float.  */
7850 static void
7851 fcvtds (sim_cpu *cpu)
7852 {
7853   unsigned sn = INSTR (9, 5);
7854   unsigned sd = INSTR (4, 0);
7855
7856   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7857   aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7858 }
7859
7860 /* Convert float to double.  */
7861 static void
7862 fcvtcpu (sim_cpu *cpu)
7863 {
7864   unsigned sn = INSTR (9, 5);
7865   unsigned sd = INSTR (4, 0);
7866
7867   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7868   aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7869 }
7870
7871 static void
7872 do_FRINT (sim_cpu *cpu)
7873 {
7874   /* instr[31,23] = 0001 1110 0
7875      instr[22]    = single(0)/double(1)
7876      instr[21,18] = 1001
7877      instr[17,15] = rounding mode
7878      instr[14,10] = 10000
7879      instr[9,5]   = source
7880      instr[4,0]   = dest  */
7881
7882   float val;
7883   unsigned rs = INSTR (9, 5);
7884   unsigned rd = INSTR (4, 0);
7885   unsigned int rmode = INSTR (17, 15);
7886
7887   NYI_assert (31, 23, 0x03C);
7888   NYI_assert (21, 18, 0x9);
7889   NYI_assert (14, 10, 0x10);
7890
7891   if (rmode == 6 || rmode == 7)
7892     /* FIXME: Add support for rmode == 6 exactness check.  */
7893     rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7894
7895   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7896   if (INSTR (22, 22))
7897     {
7898       double val = aarch64_get_FP_double (cpu, rs);
7899
7900       switch (rmode)
7901         {
7902         case 0: /* mode N: nearest or even.  */
7903           {
7904             double rval = round (val);
7905
7906             if (val - rval == 0.5)
7907               {
7908                 if (((rval / 2.0) * 2.0) != rval)
7909                   rval += 1.0;
7910               }
7911
7912             aarch64_set_FP_double (cpu, rd, round (val));
7913             return;
7914           }
7915
7916         case 1: /* mode P: towards +inf.  */
7917           if (val < 0.0)
7918             aarch64_set_FP_double (cpu, rd, trunc (val));
7919           else
7920             aarch64_set_FP_double (cpu, rd, round (val));
7921           return;
7922
7923         case 2: /* mode M: towards -inf.  */
7924           if (val < 0.0)
7925             aarch64_set_FP_double (cpu, rd, round (val));
7926           else
7927             aarch64_set_FP_double (cpu, rd, trunc (val));
7928           return;
7929
7930         case 3: /* mode Z: towards 0.  */
7931           aarch64_set_FP_double (cpu, rd, trunc (val));
7932           return;
7933
7934         case 4: /* mode A: away from 0.  */
7935           aarch64_set_FP_double (cpu, rd, round (val));
7936           return;
7937
7938         case 6: /* mode X: use FPCR with exactness check.  */
7939         case 7: /* mode I: use FPCR mode.  */
7940           HALT_NYI;
7941
7942         default:
7943           HALT_UNALLOC;
7944         }
7945     }
7946
7947   val = aarch64_get_FP_float (cpu, rs);
7948
7949   switch (rmode)
7950     {
7951     case 0: /* mode N: nearest or even.  */
7952       {
7953         float rval = roundf (val);
7954
7955         if (val - rval == 0.5)
7956           {
7957             if (((rval / 2.0) * 2.0) != rval)
7958               rval += 1.0;
7959           }
7960
7961         aarch64_set_FP_float (cpu, rd, rval);
7962         return;
7963       }
7964
7965     case 1: /* mode P: towards +inf.  */
7966       if (val < 0.0)
7967         aarch64_set_FP_float (cpu, rd, truncf (val));
7968       else
7969         aarch64_set_FP_float (cpu, rd, roundf (val));
7970       return;
7971
7972     case 2: /* mode M: towards -inf.  */
7973       if (val < 0.0)
7974         aarch64_set_FP_float (cpu, rd, truncf (val));
7975       else
7976         aarch64_set_FP_float (cpu, rd, roundf (val));
7977       return;
7978
7979     case 3: /* mode Z: towards 0.  */
7980       aarch64_set_FP_float (cpu, rd, truncf (val));
7981       return;
7982
7983     case 4: /* mode A: away from 0.  */
7984       aarch64_set_FP_float (cpu, rd, roundf (val));
7985       return;
7986
7987     case 6: /* mode X: use FPCR with exactness check.  */
7988     case 7: /* mode I: use FPCR mode.  */
7989       HALT_NYI;
7990
7991     default:
7992       HALT_UNALLOC;
7993     }
7994 }
7995
7996 /* Convert half to float.  */
7997 static void
7998 do_FCVT_half_to_single (sim_cpu *cpu)
7999 {
8000   unsigned rn = INSTR (9, 5);
8001   unsigned rd = INSTR (4, 0);
8002
8003   NYI_assert (31, 10, 0x7B890);
8004
8005   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8006   aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half  (cpu, rn));
8007 }
8008
8009 /* Convert half to double.  */
8010 static void
8011 do_FCVT_half_to_double (sim_cpu *cpu)
8012 {
8013   unsigned rn = INSTR (9, 5);
8014   unsigned rd = INSTR (4, 0);
8015
8016   NYI_assert (31, 10, 0x7B8B0);
8017
8018   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8019   aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half  (cpu, rn));
8020 }
8021
8022 static void
8023 do_FCVT_single_to_half (sim_cpu *cpu)
8024 {
8025   unsigned rn = INSTR (9, 5);
8026   unsigned rd = INSTR (4, 0);
8027
8028   NYI_assert (31, 10, 0x788F0);
8029
8030   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8031   aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float  (cpu, rn));
8032 }
8033
8034 /* Convert double to half.  */
8035 static void
8036 do_FCVT_double_to_half (sim_cpu *cpu)
8037 {
8038   unsigned rn = INSTR (9, 5);
8039   unsigned rd = INSTR (4, 0);
8040
8041   NYI_assert (31, 10, 0x798F0);
8042
8043   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8044   aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double  (cpu, rn));
8045 }
8046
8047 static void
8048 dexSimpleFPDataProc1Source (sim_cpu *cpu)
8049 {
8050   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
8051      instr[30]    = 0
8052      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
8053      instr[28,25] = 1111
8054      instr[24]    = 0
8055      instr[23,22] ==> type : 00 ==> source is single,
8056                              01 ==> source is double
8057                              10 ==> UNALLOC
8058                              11 ==> UNALLOC or source is half
8059      instr[21]    = 1
8060      instr[20,15] ==> opcode : with type 00 or 01
8061                                000000 ==> FMOV, 000001 ==> FABS,
8062                                000010 ==> FNEG, 000011 ==> FSQRT,
8063                                000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
8064                                000110 ==> UNALLOC, 000111 ==> FCVT (to half)
8065                                001000 ==> FRINTN, 001001 ==> FRINTP,
8066                                001010 ==> FRINTM, 001011 ==> FRINTZ,
8067                                001100 ==> FRINTA, 001101 ==> UNALLOC
8068                                001110 ==> FRINTX, 001111 ==> FRINTI
8069                                with type 11
8070                                000100 ==> FCVT (half-to-single)
8071                                000101 ==> FCVT (half-to-double)
8072                                instr[14,10] = 10000.  */
8073
8074   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8075   uint32_t type   = INSTR (23, 22);
8076   uint32_t opcode = INSTR (20, 15);
8077
8078   if (M_S != 0)
8079     HALT_UNALLOC;
8080
8081   if (type == 3)
8082     {
8083       if (opcode == 4)
8084         do_FCVT_half_to_single (cpu);
8085       else if (opcode == 5)
8086         do_FCVT_half_to_double (cpu);
8087       else
8088         HALT_UNALLOC;
8089       return;
8090     }
8091
8092   if (type == 2)
8093     HALT_UNALLOC;
8094
8095   switch (opcode)
8096     {
8097     case 0:
8098       if (type)
8099         ffmovd (cpu);
8100       else
8101         ffmovs (cpu);
8102       return;
8103
8104     case 1:
8105       if (type)
8106         fabcpu (cpu);
8107       else
8108         fabss (cpu);
8109       return;
8110
8111     case 2:
8112       if (type)
8113         fnegd (cpu);
8114       else
8115         fnegs (cpu);
8116       return;
8117
8118     case 3:
8119       if (type)
8120         fsqrtd (cpu);
8121       else
8122         fsqrts (cpu);
8123       return;
8124
8125     case 4:
8126       if (type)
8127         fcvtds (cpu);
8128       else
8129         HALT_UNALLOC;
8130       return;
8131
8132     case 5:
8133       if (type)
8134         HALT_UNALLOC;
8135       fcvtcpu (cpu);
8136       return;
8137
8138     case 8:             /* FRINTN etc.  */
8139     case 9:
8140     case 10:
8141     case 11:
8142     case 12:
8143     case 14:
8144     case 15:
8145        do_FRINT (cpu);
8146        return;
8147
8148     case 7:
8149       if (INSTR (22, 22))
8150         do_FCVT_double_to_half (cpu);
8151       else
8152         do_FCVT_single_to_half (cpu);
8153       return;
8154
8155     case 13:
8156       HALT_NYI;
8157
8158     default:
8159       HALT_UNALLOC;
8160     }
8161 }
8162
8163 /* 32 bit signed int to float.  */
8164 static void
8165 scvtf32 (sim_cpu *cpu)
8166 {
8167   unsigned rn = INSTR (9, 5);
8168   unsigned sd = INSTR (4, 0);
8169
8170   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8171   aarch64_set_FP_float
8172     (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8173 }
8174
8175 /* signed int to float.  */
8176 static void
8177 scvtf (sim_cpu *cpu)
8178 {
8179   unsigned rn = INSTR (9, 5);
8180   unsigned sd = INSTR (4, 0);
8181
8182   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8183   aarch64_set_FP_float
8184     (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8185 }
8186
8187 /* 32 bit signed int to double.  */
8188 static void
8189 scvtd32 (sim_cpu *cpu)
8190 {
8191   unsigned rn = INSTR (9, 5);
8192   unsigned sd = INSTR (4, 0);
8193
8194   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8195   aarch64_set_FP_double
8196     (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8197 }
8198
8199 /* signed int to double.  */
8200 static void
8201 scvtd (sim_cpu *cpu)
8202 {
8203   unsigned rn = INSTR (9, 5);
8204   unsigned sd = INSTR (4, 0);
8205
8206   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8207   aarch64_set_FP_double
8208     (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8209 }
8210
8211 static const float  FLOAT_INT_MAX   = (float)  INT_MAX;
8212 static const float  FLOAT_INT_MIN   = (float)  INT_MIN;
8213 static const double DOUBLE_INT_MAX  = (double) INT_MAX;
8214 static const double DOUBLE_INT_MIN  = (double) INT_MIN;
8215 static const float  FLOAT_LONG_MAX  = (float)  LONG_MAX;
8216 static const float  FLOAT_LONG_MIN  = (float)  LONG_MIN;
8217 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8218 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8219
8220 #define UINT_MIN 0
8221 #define ULONG_MIN 0
8222 static const float  FLOAT_UINT_MAX   = (float)  UINT_MAX;
8223 static const float  FLOAT_UINT_MIN   = (float)  UINT_MIN;
8224 static const double DOUBLE_UINT_MAX  = (double) UINT_MAX;
8225 static const double DOUBLE_UINT_MIN  = (double) UINT_MIN;
8226 static const float  FLOAT_ULONG_MAX  = (float)  ULONG_MAX;
8227 static const float  FLOAT_ULONG_MIN  = (float)  ULONG_MIN;
8228 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8229 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8230
8231 /* Check for FP exception conditions:
8232      NaN raises IO
8233      Infinity raises IO
8234      Out of Range raises IO and IX and saturates value
8235      Denormal raises ID and IX and sets to zero.  */
8236 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE)        \
8237   do                                                    \
8238     {                                                   \
8239       switch (fpclassify (F))                           \
8240         {                                               \
8241         case FP_INFINITE:                               \
8242         case FP_NAN:                                    \
8243           aarch64_set_FPSR (cpu, IO);                   \
8244           if (signbit (F))                              \
8245             VALUE = ITYPE##_MAX;                        \
8246           else                                          \
8247             VALUE = ITYPE##_MIN;                        \
8248           break;                                        \
8249                                                         \
8250         case FP_NORMAL:                                 \
8251           if (F >= FTYPE##_##ITYPE##_MAX)               \
8252             {                                           \
8253               aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX);    \
8254               VALUE = ITYPE##_MAX;                      \
8255             }                                           \
8256           else if (F <= FTYPE##_##ITYPE##_MIN)          \
8257             {                                           \
8258               aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX);    \
8259               VALUE = ITYPE##_MIN;                      \
8260             }                                           \
8261           break;                                        \
8262                                                         \
8263         case FP_SUBNORMAL:                              \
8264           aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID);   \
8265           VALUE = 0;                                    \
8266           break;                                        \
8267                                                         \
8268         default:                                        \
8269         case FP_ZERO:                                   \
8270           VALUE = 0;                                    \
8271           break;                                        \
8272         }                                               \
8273     }                                                   \
8274   while (0)
8275
8276 /* 32 bit convert float to signed int truncate towards zero.  */
8277 static void
8278 fcvtszs32 (sim_cpu *cpu)
8279 {
8280   unsigned sn = INSTR (9, 5);
8281   unsigned rd = INSTR (4, 0);
8282   /* TODO : check that this rounds toward zero.  */
8283   float   f = aarch64_get_FP_float (cpu, sn);
8284   int32_t value = (int32_t) f;
8285
8286   RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8287
8288   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8289   /* Avoid sign extension to 64 bit.  */
8290   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8291 }
8292
8293 /* 64 bit convert float to signed int truncate towards zero.  */
8294 static void
8295 fcvtszs (sim_cpu *cpu)
8296 {
8297   unsigned sn = INSTR (9, 5);
8298   unsigned rd = INSTR (4, 0);
8299   float f = aarch64_get_FP_float (cpu, sn);
8300   int64_t value = (int64_t) f;
8301
8302   RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8303
8304   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8305   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8306 }
8307
8308 /* 32 bit convert double to signed int truncate towards zero.  */
8309 static void
8310 fcvtszd32 (sim_cpu *cpu)
8311 {
8312   unsigned sn = INSTR (9, 5);
8313   unsigned rd = INSTR (4, 0);
8314   /* TODO : check that this rounds toward zero.  */
8315   double   d = aarch64_get_FP_double (cpu, sn);
8316   int32_t  value = (int32_t) d;
8317
8318   RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8319
8320   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8321   /* Avoid sign extension to 64 bit.  */
8322   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8323 }
8324
8325 /* 64 bit convert double to signed int truncate towards zero.  */
8326 static void
8327 fcvtszd (sim_cpu *cpu)
8328 {
8329   unsigned sn = INSTR (9, 5);
8330   unsigned rd = INSTR (4, 0);
8331   /* TODO : check that this rounds toward zero.  */
8332   double  d = aarch64_get_FP_double (cpu, sn);
8333   int64_t value;
8334
8335   value = (int64_t) d;
8336
8337   RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8338
8339   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8340   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8341 }
8342
8343 static void
8344 do_fcvtzu (sim_cpu *cpu)
8345 {
8346   /* instr[31]    = size: 32-bit (0), 64-bit (1)
8347      instr[30,23] = 00111100
8348      instr[22]    = type: single (0)/ double (1)
8349      instr[21]    = enable (0)/disable(1) precision
8350      instr[20,16] = 11001
8351      instr[15,10] = precision
8352      instr[9,5]   = Rs
8353      instr[4,0]   = Rd.  */
8354
8355   unsigned rs = INSTR (9, 5);
8356   unsigned rd = INSTR (4, 0);
8357
8358   NYI_assert (30, 23, 0x3C);
8359   NYI_assert (20, 16, 0x19);
8360
8361   if (INSTR (21, 21) != 1)
8362     /* Convert to fixed point.  */
8363     HALT_NYI;
8364
8365   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8366   if (INSTR (31, 31))
8367     {
8368       /* Convert to unsigned 64-bit integer.  */
8369       if (INSTR (22, 22))
8370         {
8371           double  d = aarch64_get_FP_double (cpu, rs);
8372           uint64_t value = (uint64_t) d;
8373
8374           /* Do not raise an exception if we have reached ULONG_MAX.  */
8375           if (value != (1UL << 63))
8376             RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8377
8378           aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8379         }
8380       else
8381         {
8382           float  f = aarch64_get_FP_float (cpu, rs);
8383           uint64_t value = (uint64_t) f;
8384
8385           /* Do not raise an exception if we have reached ULONG_MAX.  */
8386           if (value != (1UL << 63))
8387             RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8388
8389           aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8390         }
8391     }
8392   else
8393     {
8394       uint32_t value;
8395
8396       /* Convert to unsigned 32-bit integer.  */
8397       if (INSTR (22, 22))
8398         {
8399           double  d = aarch64_get_FP_double (cpu, rs);
8400
8401           value = (uint32_t) d;
8402           /* Do not raise an exception if we have reached UINT_MAX.  */
8403           if (value != (1UL << 31))
8404             RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8405         }
8406       else
8407         {
8408           float  f = aarch64_get_FP_float (cpu, rs);
8409
8410           value = (uint32_t) f;
8411           /* Do not raise an exception if we have reached UINT_MAX.  */
8412           if (value != (1UL << 31))
8413             RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8414         }
8415
8416       aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8417     }
8418 }
8419
8420 static void
8421 do_UCVTF (sim_cpu *cpu)
8422 {
8423   /* instr[31]    = size: 32-bit (0), 64-bit (1)
8424      instr[30,23] = 001 1110 0
8425      instr[22]    = type: single (0)/ double (1)
8426      instr[21]    = enable (0)/disable(1) precision
8427      instr[20,16] = 0 0011
8428      instr[15,10] = precision
8429      instr[9,5]   = Rs
8430      instr[4,0]   = Rd.  */
8431
8432   unsigned rs = INSTR (9, 5);
8433   unsigned rd = INSTR (4, 0);
8434
8435   NYI_assert (30, 23, 0x3C);
8436   NYI_assert (20, 16, 0x03);
8437
8438   if (INSTR (21, 21) != 1)
8439     HALT_NYI;
8440
8441   /* FIXME: Add exception raising.  */
8442   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8443   if (INSTR (31, 31))
8444     {
8445       uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8446
8447       if (INSTR (22, 22))
8448         aarch64_set_FP_double (cpu, rd, (double) value);
8449       else
8450         aarch64_set_FP_float (cpu, rd, (float) value);
8451     }
8452   else
8453     {
8454       uint32_t value =  aarch64_get_reg_u32 (cpu, rs, NO_SP);
8455
8456       if (INSTR (22, 22))
8457         aarch64_set_FP_double (cpu, rd, (double) value);
8458       else
8459         aarch64_set_FP_float (cpu, rd, (float) value);
8460     }
8461 }
8462
8463 static void
8464 float_vector_move (sim_cpu *cpu)
8465 {
8466   /* instr[31,17] == 100 1111 0101 0111
8467      instr[16]    ==> direction 0=> to GR, 1=> from GR
8468      instr[15,10] => ???
8469      instr[9,5]   ==> source
8470      instr[4,0]   ==> dest.  */
8471
8472   unsigned rn = INSTR (9, 5);
8473   unsigned rd = INSTR (4, 0);
8474
8475   NYI_assert (31, 17, 0x4F57);
8476
8477   if (INSTR (15, 10) != 0)
8478     HALT_UNALLOC;
8479
8480   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8481   if (INSTR (16, 16))
8482     aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8483   else
8484     aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8485 }
8486
8487 static void
8488 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8489 {
8490   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
8491      instr[30     = 0
8492      instr[29]    = S :  0 ==> OK, 1 ==> UNALLOC
8493      instr[28,25] = 1111
8494      instr[24]    = 0
8495      instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8496      instr[21]    = 1
8497      instr[20,19] = rmode
8498      instr[18,16] = opcode
8499      instr[15,10] = 10 0000  */
8500
8501   uint32_t rmode_opcode;
8502   uint32_t size_type;
8503   uint32_t type;
8504   uint32_t size;
8505   uint32_t S;
8506
8507   if (INSTR (31, 17) == 0x4F57)
8508     {
8509       float_vector_move (cpu);
8510       return;
8511     }
8512
8513   size = INSTR (31, 31);
8514   S = INSTR (29, 29);
8515   if (S != 0)
8516     HALT_UNALLOC;
8517
8518   type = INSTR (23, 22);
8519   if (type > 1)
8520     HALT_UNALLOC;
8521
8522   rmode_opcode = INSTR (20, 16);
8523   size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d.  */
8524
8525   switch (rmode_opcode)
8526     {
8527     case 2:                     /* SCVTF.  */
8528       switch (size_type)
8529         {
8530         case 0: scvtf32 (cpu); return;
8531         case 1: scvtd32 (cpu); return;
8532         case 2: scvtf (cpu); return;
8533         case 3: scvtd (cpu); return;
8534         }
8535
8536     case 6:                     /* FMOV GR, Vec.  */
8537       switch (size_type)
8538         {
8539         case 0:  gfmovs (cpu); return;
8540         case 3:  gfmovd (cpu); return;
8541         default: HALT_UNALLOC;
8542         }
8543
8544     case 7:                     /* FMOV vec, GR.  */
8545       switch (size_type)
8546         {
8547         case 0:  fgmovs (cpu); return;
8548         case 3:  fgmovd (cpu); return;
8549         default: HALT_UNALLOC;
8550         }
8551
8552     case 24:                    /* FCVTZS.  */
8553       switch (size_type)
8554         {
8555         case 0: fcvtszs32 (cpu); return;
8556         case 1: fcvtszd32 (cpu); return;
8557         case 2: fcvtszs (cpu); return;
8558         case 3: fcvtszd (cpu); return;
8559         }
8560
8561     case 25: do_fcvtzu (cpu); return;
8562     case 3:  do_UCVTF (cpu); return;
8563
8564     case 0:     /* FCVTNS.  */
8565     case 1:     /* FCVTNU.  */
8566     case 4:     /* FCVTAS.  */
8567     case 5:     /* FCVTAU.  */
8568     case 8:     /* FCVPTS.  */
8569     case 9:     /* FCVTPU.  */
8570     case 16:    /* FCVTMS.  */
8571     case 17:    /* FCVTMU.  */
8572     default:
8573       HALT_NYI;
8574     }
8575 }
8576
8577 static void
8578 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8579 {
8580   uint32_t flags;
8581
8582   /* FIXME: Add exception raising.  */
8583   if (isnan (fvalue1) || isnan (fvalue2))
8584     flags = C|V;
8585   else if (isinf (fvalue1) && isinf (fvalue2))
8586     {
8587       /* Subtracting two infinities may give a NaN.  We only need to compare
8588          the signs, which we can get from isinf.  */
8589       int result = isinf (fvalue1) - isinf (fvalue2);
8590
8591       if (result == 0)
8592         flags = Z|C;
8593       else if (result < 0)
8594         flags = N;
8595       else /* (result > 0).  */
8596         flags = C;
8597     }
8598   else
8599     {
8600       float result = fvalue1 - fvalue2;
8601
8602       if (result == 0.0)
8603         flags = Z|C;
8604       else if (result < 0)
8605         flags = N;
8606       else /* (result > 0).  */
8607         flags = C;
8608     }
8609
8610   aarch64_set_CPSR (cpu, flags);
8611 }
8612
8613 static void
8614 fcmps (sim_cpu *cpu)
8615 {
8616   unsigned sm = INSTR (20, 16);
8617   unsigned sn = INSTR ( 9,  5);
8618
8619   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8620   float fvalue2 = aarch64_get_FP_float (cpu, sm);
8621
8622   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8623   set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8624 }
8625
8626 /* Float compare to zero -- Invalid Operation exception
8627    only on signaling NaNs.  */
8628 static void
8629 fcmpzs (sim_cpu *cpu)
8630 {
8631   unsigned sn = INSTR ( 9,  5);
8632   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8633
8634   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8635   set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8636 }
8637
8638 /* Float compare -- Invalid Operation exception on all NaNs.  */
8639 static void
8640 fcmpes (sim_cpu *cpu)
8641 {
8642   unsigned sm = INSTR (20, 16);
8643   unsigned sn = INSTR ( 9,  5);
8644
8645   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8646   float fvalue2 = aarch64_get_FP_float (cpu, sm);
8647
8648   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8649   set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8650 }
8651
8652 /* Float compare to zero -- Invalid Operation exception on all NaNs.  */
8653 static void
8654 fcmpzes (sim_cpu *cpu)
8655 {
8656   unsigned sn = INSTR ( 9,  5);
8657   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8658
8659   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8660   set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8661 }
8662
8663 static void
8664 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8665 {
8666   uint32_t flags;
8667
8668   /* FIXME: Add exception raising.  */
8669   if (isnan (dval1) || isnan (dval2))
8670     flags = C|V;
8671   else if (isinf (dval1) && isinf (dval2))
8672     {
8673       /* Subtracting two infinities may give a NaN.  We only need to compare
8674          the signs, which we can get from isinf.  */
8675       int result = isinf (dval1) - isinf (dval2);
8676
8677       if (result == 0)
8678         flags = Z|C;
8679       else if (result < 0)
8680         flags = N;
8681       else /* (result > 0).  */
8682         flags = C;
8683     }
8684   else
8685     {
8686       double result = dval1 - dval2;
8687
8688       if (result == 0.0)
8689         flags = Z|C;
8690       else if (result < 0)
8691         flags = N;
8692       else /* (result > 0).  */
8693         flags = C;
8694     }
8695
8696   aarch64_set_CPSR (cpu, flags);
8697 }
8698
8699 /* Double compare -- Invalid Operation exception only on signaling NaNs.  */
8700 static void
8701 fcmpd (sim_cpu *cpu)
8702 {
8703   unsigned sm = INSTR (20, 16);
8704   unsigned sn = INSTR ( 9,  5);
8705
8706   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8707   double dvalue2 = aarch64_get_FP_double (cpu, sm);
8708
8709   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8710   set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8711 }
8712
8713 /* Double compare to zero -- Invalid Operation exception
8714    only on signaling NaNs.  */
8715 static void
8716 fcmpzd (sim_cpu *cpu)
8717 {
8718   unsigned sn = INSTR ( 9,  5);
8719   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8720
8721   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8722   set_flags_for_double_compare (cpu, dvalue1, 0.0);
8723 }
8724
8725 /* Double compare -- Invalid Operation exception on all NaNs.  */
8726 static void
8727 fcmped (sim_cpu *cpu)
8728 {
8729   unsigned sm = INSTR (20, 16);
8730   unsigned sn = INSTR ( 9,  5);
8731
8732   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8733   double dvalue2 = aarch64_get_FP_double (cpu, sm);
8734
8735   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8736   set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8737 }
8738
8739 /* Double compare to zero -- Invalid Operation exception on all NaNs.  */
8740 static void
8741 fcmpzed (sim_cpu *cpu)
8742 {
8743   unsigned sn = INSTR ( 9,  5);
8744   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8745
8746   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8747   set_flags_for_double_compare (cpu, dvalue1, 0.0);
8748 }
8749
8750 static void
8751 dexSimpleFPCompare (sim_cpu *cpu)
8752 {
8753   /* assert instr[28,25] == 1111
8754      instr[30:24:21:13,10] = 0011000
8755      instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8756      instr[29] ==> S :  0 ==> OK, 1 ==> UNALLOC
8757      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8758      instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8759      instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8760                               01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8761                               ow ==> UNALLOC  */
8762   uint32_t dispatch;
8763   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8764   uint32_t type = INSTR (23, 22);
8765   uint32_t op = INSTR (15, 14);
8766   uint32_t op2_2_0 = INSTR (2, 0);
8767
8768   if (op2_2_0 != 0)
8769     HALT_UNALLOC;
8770
8771   if (M_S != 0)
8772     HALT_UNALLOC;
8773
8774   if (type > 1)
8775     HALT_UNALLOC;
8776
8777   if (op != 0)
8778     HALT_UNALLOC;
8779
8780   /* dispatch on type and top 2 bits of opcode.  */
8781   dispatch = (type << 2) | INSTR (4, 3);
8782
8783   switch (dispatch)
8784     {
8785     case 0: fcmps (cpu); return;
8786     case 1: fcmpzs (cpu); return;
8787     case 2: fcmpes (cpu); return;
8788     case 3: fcmpzes (cpu); return;
8789     case 4: fcmpd (cpu); return;
8790     case 5: fcmpzd (cpu); return;
8791     case 6: fcmped (cpu); return;
8792     case 7: fcmpzed (cpu); return;
8793     }
8794 }
8795
8796 static void
8797 do_scalar_FADDP (sim_cpu *cpu)
8798 {
8799   /* instr [31,23] = 0111 1110 0
8800      instr [22]    = single(0)/double(1)
8801      instr [21,10] = 11 0000 1101 10
8802      instr [9,5]   = Fn
8803      instr [4,0]   = Fd.  */
8804
8805   unsigned Fn = INSTR (9, 5);
8806   unsigned Fd = INSTR (4, 0);
8807
8808   NYI_assert (31, 23, 0x0FC);
8809   NYI_assert (21, 10, 0xC36);
8810
8811   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8812   if (INSTR (22, 22))
8813     {
8814       double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8815       double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8816
8817       aarch64_set_FP_double (cpu, Fd, val1 + val2);
8818     }
8819   else
8820     {
8821       float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8822       float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8823
8824       aarch64_set_FP_float (cpu, Fd, val1 + val2);
8825     }
8826 }
8827
8828 /* Floating point absolute difference.  */
8829
8830 static void
8831 do_scalar_FABD (sim_cpu *cpu)
8832 {
8833   /* instr [31,23] = 0111 1110 1
8834      instr [22]    = float(0)/double(1)
8835      instr [21]    = 1
8836      instr [20,16] = Rm
8837      instr [15,10] = 1101 01
8838      instr [9, 5]  = Rn
8839      instr [4, 0]  = Rd.  */
8840
8841   unsigned rm = INSTR (20, 16);
8842   unsigned rn = INSTR (9, 5);
8843   unsigned rd = INSTR (4, 0);
8844
8845   NYI_assert (31, 23, 0x0FD);
8846   NYI_assert (21, 21, 1);
8847   NYI_assert (15, 10, 0x35);
8848
8849   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8850   if (INSTR (22, 22))
8851     aarch64_set_FP_double (cpu, rd,
8852                            fabs (aarch64_get_FP_double (cpu, rn)
8853                                  - aarch64_get_FP_double (cpu, rm)));
8854   else
8855     aarch64_set_FP_float (cpu, rd,
8856                           fabsf (aarch64_get_FP_float (cpu, rn)
8857                                  - aarch64_get_FP_float (cpu, rm)));
8858 }
8859
8860 static void
8861 do_scalar_CMGT (sim_cpu *cpu)
8862 {
8863   /* instr [31,21] = 0101 1110 111
8864      instr [20,16] = Rm
8865      instr [15,10] = 00 1101
8866      instr [9, 5]  = Rn
8867      instr [4, 0]  = Rd.  */
8868
8869   unsigned rm = INSTR (20, 16);
8870   unsigned rn = INSTR (9, 5);
8871   unsigned rd = INSTR (4, 0);
8872
8873   NYI_assert (31, 21, 0x2F7);
8874   NYI_assert (15, 10, 0x0D);
8875
8876   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8877   aarch64_set_vec_u64 (cpu, rd, 0,
8878                        aarch64_get_vec_u64 (cpu, rn, 0) >
8879                        aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8880 }
8881
8882 static void
8883 do_scalar_USHR (sim_cpu *cpu)
8884 {
8885   /* instr [31,23] = 0111 1111 0
8886      instr [22,16] = shift amount
8887      instr [15,10] = 0000 01
8888      instr [9, 5]  = Rn
8889      instr [4, 0]  = Rd.  */
8890
8891   unsigned amount = 128 - INSTR (22, 16);
8892   unsigned rn = INSTR (9, 5);
8893   unsigned rd = INSTR (4, 0);
8894
8895   NYI_assert (31, 23, 0x0FE);
8896   NYI_assert (15, 10, 0x01);
8897
8898   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8899   aarch64_set_vec_u64 (cpu, rd, 0,
8900                        aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8901 }
8902
8903 static void
8904 do_scalar_SSHL (sim_cpu *cpu)
8905 {
8906   /* instr [31,21] = 0101 1110 111
8907      instr [20,16] = Rm
8908      instr [15,10] = 0100 01
8909      instr [9, 5]  = Rn
8910      instr [4, 0]  = Rd.  */
8911
8912   unsigned rm = INSTR (20, 16);
8913   unsigned rn = INSTR (9, 5);
8914   unsigned rd = INSTR (4, 0);
8915   signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8916
8917   NYI_assert (31, 21, 0x2F7);
8918   NYI_assert (15, 10, 0x11);
8919
8920   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8921   if (shift >= 0)
8922     aarch64_set_vec_s64 (cpu, rd, 0,
8923                          aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8924   else
8925     aarch64_set_vec_s64 (cpu, rd, 0,
8926                          aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8927 }
8928
8929 /* Floating point scalar compare greater than or equal to 0.  */
8930 static void
8931 do_scalar_FCMGE_zero (sim_cpu *cpu)
8932 {
8933   /* instr [31,23] = 0111 1110 1
8934      instr [22,22] = size
8935      instr [21,16] = 1000 00
8936      instr [15,10] = 1100 10
8937      instr [9, 5]  = Rn
8938      instr [4, 0]  = Rd.  */
8939
8940   unsigned size = INSTR (22, 22);
8941   unsigned rn = INSTR (9, 5);
8942   unsigned rd = INSTR (4, 0);
8943
8944   NYI_assert (31, 23, 0x0FD);
8945   NYI_assert (21, 16, 0x20);
8946   NYI_assert (15, 10, 0x32);
8947
8948   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8949   if (size)
8950     aarch64_set_vec_u64 (cpu, rd, 0,
8951                          aarch64_get_vec_double (cpu, rn, 0) >= 0.0 ? -1 : 0);
8952   else
8953     aarch64_set_vec_u32 (cpu, rd, 0,
8954                          aarch64_get_vec_float (cpu, rn, 0) >= 0.0 ? -1 : 0);
8955 }
8956
8957 /* Floating point scalar compare less than or equal to 0.  */
8958 static void
8959 do_scalar_FCMLE_zero (sim_cpu *cpu)
8960 {
8961   /* instr [31,23] = 0111 1110 1
8962      instr [22,22] = size
8963      instr [21,16] = 1000 00
8964      instr [15,10] = 1101 10
8965      instr [9, 5]  = Rn
8966      instr [4, 0]  = Rd.  */
8967
8968   unsigned size = INSTR (22, 22);
8969   unsigned rn = INSTR (9, 5);
8970   unsigned rd = INSTR (4, 0);
8971
8972   NYI_assert (31, 23, 0x0FD);
8973   NYI_assert (21, 16, 0x20);
8974   NYI_assert (15, 10, 0x36);
8975
8976   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8977   if (size)
8978     aarch64_set_vec_u64 (cpu, rd, 0,
8979                          aarch64_get_vec_double (cpu, rn, 0) <= 0.0 ? -1 : 0);
8980   else
8981     aarch64_set_vec_u32 (cpu, rd, 0,
8982                          aarch64_get_vec_float (cpu, rn, 0) <= 0.0 ? -1 : 0);
8983 }
8984
8985 /* Floating point scalar compare greater than 0.  */
8986 static void
8987 do_scalar_FCMGT_zero (sim_cpu *cpu)
8988 {
8989   /* instr [31,23] = 0101 1110 1
8990      instr [22,22] = size
8991      instr [21,16] = 1000 00
8992      instr [15,10] = 1100 10
8993      instr [9, 5]  = Rn
8994      instr [4, 0]  = Rd.  */
8995
8996   unsigned size = INSTR (22, 22);
8997   unsigned rn = INSTR (9, 5);
8998   unsigned rd = INSTR (4, 0);
8999
9000   NYI_assert (31, 23, 0x0BD);
9001   NYI_assert (21, 16, 0x20);
9002   NYI_assert (15, 10, 0x32);
9003
9004   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9005   if (size)
9006     aarch64_set_vec_u64 (cpu, rd, 0,
9007                          aarch64_get_vec_double (cpu, rn, 0) > 0.0 ? -1 : 0);
9008   else
9009     aarch64_set_vec_u32 (cpu, rd, 0,
9010                          aarch64_get_vec_float (cpu, rn, 0) > 0.0 ? -1 : 0);
9011 }
9012
9013 /* Floating point scalar compare equal to 0.  */
9014 static void
9015 do_scalar_FCMEQ_zero (sim_cpu *cpu)
9016 {
9017   /* instr [31,23] = 0101 1110 1
9018      instr [22,22] = size
9019      instr [21,16] = 1000 00
9020      instr [15,10] = 1101 10
9021      instr [9, 5]  = Rn
9022      instr [4, 0]  = Rd.  */
9023
9024   unsigned size = INSTR (22, 22);
9025   unsigned rn = INSTR (9, 5);
9026   unsigned rd = INSTR (4, 0);
9027
9028   NYI_assert (31, 23, 0x0BD);
9029   NYI_assert (21, 16, 0x20);
9030   NYI_assert (15, 10, 0x36);
9031
9032   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9033   if (size)
9034     aarch64_set_vec_u64 (cpu, rd, 0,
9035                          aarch64_get_vec_double (cpu, rn, 0) == 0.0 ? -1 : 0);
9036   else
9037     aarch64_set_vec_u32 (cpu, rd, 0,
9038                          aarch64_get_vec_float (cpu, rn, 0) == 0.0 ? -1 : 0);
9039 }
9040
9041 /* Floating point scalar compare less than 0.  */
9042 static void
9043 do_scalar_FCMLT_zero (sim_cpu *cpu)
9044 {
9045   /* instr [31,23] = 0101 1110 1
9046      instr [22,22] = size
9047      instr [21,16] = 1000 00
9048      instr [15,10] = 1110 10
9049      instr [9, 5]  = Rn
9050      instr [4, 0]  = Rd.  */
9051
9052   unsigned size = INSTR (22, 22);
9053   unsigned rn = INSTR (9, 5);
9054   unsigned rd = INSTR (4, 0);
9055
9056   NYI_assert (31, 23, 0x0BD);
9057   NYI_assert (21, 16, 0x20);
9058   NYI_assert (15, 10, 0x3A);
9059
9060   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9061   if (size)
9062     aarch64_set_vec_u64 (cpu, rd, 0,
9063                          aarch64_get_vec_double (cpu, rn, 0) < 0.0 ? -1 : 0);
9064   else
9065     aarch64_set_vec_u32 (cpu, rd, 0,
9066                          aarch64_get_vec_float (cpu, rn, 0) < 0.0 ? -1 : 0);
9067 }
9068
9069 static void
9070 do_scalar_shift (sim_cpu *cpu)
9071 {
9072   /* instr [31,23] = 0101 1111 0
9073      instr [22,16] = shift amount
9074      instr [15,10] = 0101 01   [SHL]
9075      instr [15,10] = 0000 01   [SSHR]
9076      instr [9, 5]  = Rn
9077      instr [4, 0]  = Rd.  */
9078
9079   unsigned rn = INSTR (9, 5);
9080   unsigned rd = INSTR (4, 0);
9081   unsigned amount;
9082
9083   NYI_assert (31, 23, 0x0BE);
9084
9085   if (INSTR (22, 22) == 0)
9086     HALT_UNALLOC;
9087
9088   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9089   switch (INSTR (15, 10))
9090     {
9091     case 0x01: /* SSHR */
9092       amount = 128 - INSTR (22, 16);
9093       aarch64_set_vec_s64 (cpu, rd, 0,
9094                            aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
9095       return;
9096     case 0x15: /* SHL */
9097       amount = INSTR (22, 16) - 64;
9098       aarch64_set_vec_u64 (cpu, rd, 0,
9099                            aarch64_get_vec_u64 (cpu, rn, 0) << amount);
9100       return;
9101     default:
9102       HALT_NYI;
9103     }
9104 }
9105
9106 /* FCMEQ FCMGT FCMGE.  */
9107 static void
9108 do_scalar_FCM (sim_cpu *cpu)
9109 {
9110   /* instr [31,30] = 01
9111      instr [29]    = U
9112      instr [28,24] = 1 1110
9113      instr [23]    = E
9114      instr [22]    = size
9115      instr [21]    = 1
9116      instr [20,16] = Rm
9117      instr [15,12] = 1110
9118      instr [11]    = AC
9119      instr [10]    = 1
9120      instr [9, 5]  = Rn
9121      instr [4, 0]  = Rd.  */
9122
9123   unsigned rm = INSTR (20, 16);
9124   unsigned rn = INSTR (9, 5);
9125   unsigned rd = INSTR (4, 0);
9126   unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
9127   unsigned result;
9128   float val1;
9129   float val2;
9130
9131   NYI_assert (31, 30, 1);
9132   NYI_assert (28, 24, 0x1E);
9133   NYI_assert (21, 21, 1);
9134   NYI_assert (15, 12, 0xE);
9135   NYI_assert (10, 10, 1);
9136
9137   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9138   if (INSTR (22, 22))
9139     {
9140       double val1 = aarch64_get_FP_double (cpu, rn);
9141       double val2 = aarch64_get_FP_double (cpu, rm);
9142
9143       switch (EUac)
9144         {
9145         case 0: /* 000 */
9146           result = val1 == val2;
9147           break;
9148
9149         case 3: /* 011 */
9150           val1 = fabs (val1);
9151           val2 = fabs (val2);
9152           /* Fall through. */
9153         case 2: /* 010 */
9154           result = val1 >= val2;
9155           break;
9156
9157         case 7: /* 111 */
9158           val1 = fabs (val1);
9159           val2 = fabs (val2);
9160           /* Fall through. */
9161         case 6: /* 110 */
9162           result = val1 > val2;
9163           break;
9164
9165         default:
9166           HALT_UNALLOC;
9167         }
9168
9169       aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9170       return;
9171     }
9172
9173   val1 = aarch64_get_FP_float (cpu, rn);
9174   val2 = aarch64_get_FP_float (cpu, rm);
9175
9176   switch (EUac)
9177     {
9178     case 0: /* 000 */
9179       result = val1 == val2;
9180       break;
9181
9182     case 3: /* 011 */
9183       val1 = fabsf (val1);
9184       val2 = fabsf (val2);
9185       /* Fall through. */
9186     case 2: /* 010 */
9187       result = val1 >= val2;
9188       break;
9189
9190     case 7: /* 111 */
9191       val1 = fabsf (val1);
9192       val2 = fabsf (val2);
9193       /* Fall through. */
9194     case 6: /* 110 */
9195       result = val1 > val2;
9196       break;
9197
9198     default:
9199       HALT_UNALLOC;
9200     }
9201
9202   aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9203 }
9204
9205 /* An alias of DUP.  */
9206 static void
9207 do_scalar_MOV (sim_cpu *cpu)
9208 {
9209   /* instr [31,21] = 0101 1110 000
9210      instr [20,16] = imm5
9211      instr [15,10] = 0000 01
9212      instr [9, 5]  = Rn
9213      instr [4, 0]  = Rd.  */
9214
9215   unsigned rn = INSTR (9, 5);
9216   unsigned rd = INSTR (4, 0);
9217   unsigned index;
9218
9219   NYI_assert (31, 21, 0x2F0);
9220   NYI_assert (15, 10, 0x01);
9221
9222   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9223   if (INSTR (16, 16))
9224     {
9225       /* 8-bit.  */
9226       index = INSTR (20, 17);
9227       aarch64_set_vec_u8
9228         (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
9229     }
9230   else if (INSTR (17, 17))
9231     {
9232       /* 16-bit.  */
9233       index = INSTR (20, 18);
9234       aarch64_set_vec_u16
9235         (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9236     }
9237   else if (INSTR (18, 18))
9238     {
9239       /* 32-bit.  */
9240       index = INSTR (20, 19);
9241       aarch64_set_vec_u32
9242         (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9243     }
9244   else if (INSTR (19, 19))
9245     {
9246       /* 64-bit.  */
9247       index = INSTR (20, 20);
9248       aarch64_set_vec_u64
9249         (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9250     }
9251   else
9252     HALT_UNALLOC;
9253 }
9254
9255 static void
9256 do_scalar_NEG (sim_cpu *cpu)
9257 {
9258   /* instr [31,10] = 0111 1110 1110 0000 1011 10
9259      instr [9, 5]  = Rn
9260      instr [4, 0]  = Rd.  */
9261
9262   unsigned rn = INSTR (9, 5);
9263   unsigned rd = INSTR (4, 0);
9264
9265   NYI_assert (31, 10, 0x1FB82E);
9266
9267   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9268   aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9269 }
9270
9271 static void
9272 do_scalar_USHL (sim_cpu *cpu)
9273 {
9274   /* instr [31,21] = 0111 1110 111
9275      instr [20,16] = Rm
9276      instr [15,10] = 0100 01
9277      instr [9, 5]  = Rn
9278      instr [4, 0]  = Rd.  */
9279
9280   unsigned rm = INSTR (20, 16);
9281   unsigned rn = INSTR (9, 5);
9282   unsigned rd = INSTR (4, 0);
9283   signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9284
9285   NYI_assert (31, 21, 0x3F7);
9286   NYI_assert (15, 10, 0x11);
9287
9288   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9289   if (shift >= 0)
9290     aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9291   else
9292     aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9293 }
9294
9295 static void
9296 do_double_add (sim_cpu *cpu)
9297 {
9298   /* instr [31,21] = 0101 1110 111
9299      instr [20,16] = Fn
9300      instr [15,10] = 1000 01
9301      instr [9,5]   = Fm
9302      instr [4,0]   = Fd.  */
9303   unsigned Fd;
9304   unsigned Fm;
9305   unsigned Fn;
9306   double val1;
9307   double val2;
9308
9309   NYI_assert (31, 21, 0x2F7);
9310   NYI_assert (15, 10, 0x21);
9311
9312   Fd = INSTR (4, 0);
9313   Fm = INSTR (9, 5);
9314   Fn = INSTR (20, 16);
9315
9316   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9317   val1 = aarch64_get_FP_double (cpu, Fm);
9318   val2 = aarch64_get_FP_double (cpu, Fn);
9319
9320   aarch64_set_FP_double (cpu, Fd, val1 + val2);
9321 }
9322
9323 static void
9324 do_scalar_UCVTF (sim_cpu *cpu)
9325 {
9326   /* instr [31,23] = 0111 1110 0
9327      instr [22]    = single(0)/double(1)
9328      instr [21,10] = 10 0001 1101 10
9329      instr [9,5]   = rn
9330      instr [4,0]   = rd.  */
9331
9332   unsigned rn = INSTR (9, 5);
9333   unsigned rd = INSTR (4, 0);
9334
9335   NYI_assert (31, 23, 0x0FC);
9336   NYI_assert (21, 10, 0x876);
9337
9338   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9339   if (INSTR (22, 22))
9340     {
9341       uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9342
9343       aarch64_set_vec_double (cpu, rd, 0, (double) val);
9344     }
9345   else
9346     {
9347       uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9348
9349       aarch64_set_vec_float (cpu, rd, 0, (float) val);
9350     }
9351 }
9352
9353 static void
9354 do_scalar_vec (sim_cpu *cpu)
9355 {
9356   /* instr [30] = 1.  */
9357   /* instr [28,25] = 1111.  */
9358   switch (INSTR (31, 23))
9359     {
9360     case 0xBC:
9361       switch (INSTR (15, 10))
9362         {
9363         case 0x01: do_scalar_MOV (cpu); return;
9364         case 0x39: do_scalar_FCM (cpu); return;
9365         case 0x3B: do_scalar_FCM (cpu); return;
9366         }
9367       break;
9368
9369     case 0xBE: do_scalar_shift (cpu); return;
9370
9371     case 0xFC:
9372       switch (INSTR (15, 10))
9373         {
9374         case 0x36:
9375           switch (INSTR (21, 16))
9376             {
9377             case 0x30: do_scalar_FADDP (cpu); return;
9378             case 0x21: do_scalar_UCVTF (cpu); return;
9379             }
9380           HALT_NYI;
9381         case 0x39: do_scalar_FCM (cpu); return;
9382         case 0x3B: do_scalar_FCM (cpu); return;
9383         }
9384       break;
9385
9386     case 0xFD:
9387       switch (INSTR (15, 10))
9388         {
9389         case 0x0D: do_scalar_CMGT (cpu); return;
9390         case 0x11: do_scalar_USHL (cpu); return;
9391         case 0x2E: do_scalar_NEG (cpu); return;
9392         case 0x32: do_scalar_FCMGE_zero (cpu); return;
9393         case 0x35: do_scalar_FABD (cpu); return;
9394         case 0x36: do_scalar_FCMLE_zero (cpu); return;
9395         case 0x39: do_scalar_FCM (cpu); return;
9396         case 0x3B: do_scalar_FCM (cpu); return;
9397         default:
9398           HALT_NYI;
9399         }
9400
9401     case 0xFE: do_scalar_USHR (cpu); return;
9402
9403     case 0xBD:
9404       switch (INSTR (15, 10))
9405         {
9406         case 0x21: do_double_add (cpu); return;
9407         case 0x11: do_scalar_SSHL (cpu); return;
9408         case 0x32: do_scalar_FCMGT_zero (cpu); return;
9409         case 0x36: do_scalar_FCMEQ_zero (cpu); return;
9410         case 0x3A: do_scalar_FCMLT_zero (cpu); return;
9411         default:
9412           HALT_NYI;
9413         }
9414
9415     default:
9416       HALT_NYI;
9417     }
9418 }
9419
9420 static void
9421 dexAdvSIMD1 (sim_cpu *cpu)
9422 {
9423   /* instr [28,25] = 1 111.  */
9424
9425   /* We are currently only interested in the basic
9426      scalar fp routines which all have bit 30 = 0.  */
9427   if (INSTR (30, 30))
9428     do_scalar_vec (cpu);
9429
9430   /* instr[24] is set for FP data processing 3-source and clear for
9431      all other basic scalar fp instruction groups.  */
9432   else if (INSTR (24, 24))
9433     dexSimpleFPDataProc3Source (cpu);
9434
9435   /* instr[21] is clear for floating <-> fixed conversions and set for
9436      all other basic scalar fp instruction groups.  */
9437   else if (!INSTR (21, 21))
9438     dexSimpleFPFixedConvert (cpu);
9439
9440   /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9441      11 ==> cond select,  00 ==> other.  */
9442   else
9443     switch (INSTR (11, 10))
9444       {
9445       case 1: dexSimpleFPCondCompare (cpu); return;
9446       case 2: dexSimpleFPDataProc2Source (cpu); return;
9447       case 3: dexSimpleFPCondSelect (cpu); return;
9448
9449       default:
9450         /* Now an ordered cascade of tests.
9451            FP immediate has instr [12] == 1.
9452            FP compare has   instr [13] == 1.
9453            FP Data Proc 1 Source has instr [14] == 1.
9454            FP floating <--> integer conversions has instr [15] == 0.  */
9455         if (INSTR (12, 12))
9456           dexSimpleFPImmediate (cpu);
9457
9458         else if (INSTR (13, 13))
9459           dexSimpleFPCompare (cpu);
9460
9461         else if (INSTR (14, 14))
9462           dexSimpleFPDataProc1Source (cpu);
9463
9464         else if (!INSTR (15, 15))
9465           dexSimpleFPIntegerConvert (cpu);
9466
9467         else
9468           /* If we get here then instr[15] == 1 which means UNALLOC.  */
9469           HALT_UNALLOC;
9470       }
9471 }
9472
9473 /* PC relative addressing.  */
9474
9475 static void
9476 pcadr (sim_cpu *cpu)
9477 {
9478   /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9479      instr[30,29] = immlo
9480      instr[23,5] = immhi.  */
9481   uint64_t address;
9482   unsigned rd = INSTR (4, 0);
9483   uint32_t isPage = INSTR (31, 31);
9484   union { int64_t u64; uint64_t s64; } imm;
9485   uint64_t offset;
9486
9487   imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9488   offset = imm.u64;
9489   offset = (offset << 2) | INSTR (30, 29);
9490
9491   address = aarch64_get_PC (cpu);
9492
9493   if (isPage)
9494     {
9495       offset <<= 12;
9496       address &= ~0xfff;
9497     }
9498
9499   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9500   aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9501 }
9502
9503 /* Specific decode and execute for group Data Processing Immediate.  */
9504
9505 static void
9506 dexPCRelAddressing (sim_cpu *cpu)
9507 {
9508   /* assert instr[28,24] = 10000.  */
9509   pcadr (cpu);
9510 }
9511
9512 /* Immediate logical.
9513    The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9514    16, 32 or 64 bit sequence pulled out at decode and possibly
9515    inverting it..
9516
9517    N.B. the output register (dest) can normally be Xn or SP
9518    the exception occurs for flag setting instructions which may
9519    only use Xn for the output (dest).  The input register can
9520    never be SP.  */
9521
9522 /* 32 bit and immediate.  */
9523 static void
9524 and32 (sim_cpu *cpu, uint32_t bimm)
9525 {
9526   unsigned rn = INSTR (9, 5);
9527   unsigned rd = INSTR (4, 0);
9528
9529   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9530   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9531                        aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9532 }
9533
9534 /* 64 bit and immediate.  */
9535 static void
9536 and64 (sim_cpu *cpu, uint64_t bimm)
9537 {
9538   unsigned rn = INSTR (9, 5);
9539   unsigned rd = INSTR (4, 0);
9540
9541   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9542   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9543                        aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9544 }
9545
9546 /* 32 bit and immediate set flags.  */
9547 static void
9548 ands32 (sim_cpu *cpu, uint32_t bimm)
9549 {
9550   unsigned rn = INSTR (9, 5);
9551   unsigned rd = INSTR (4, 0);
9552
9553   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9554   uint32_t value2 = bimm;
9555
9556   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9557   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9558   set_flags_for_binop32 (cpu, value1 & value2);
9559 }
9560
9561 /* 64 bit and immediate set flags.  */
9562 static void
9563 ands64 (sim_cpu *cpu, uint64_t bimm)
9564 {
9565   unsigned rn = INSTR (9, 5);
9566   unsigned rd = INSTR (4, 0);
9567
9568   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9569   uint64_t value2 = bimm;
9570
9571   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9572   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9573   set_flags_for_binop64 (cpu, value1 & value2);
9574 }
9575
9576 /* 32 bit exclusive or immediate.  */
9577 static void
9578 eor32 (sim_cpu *cpu, uint32_t bimm)
9579 {
9580   unsigned rn = INSTR (9, 5);
9581   unsigned rd = INSTR (4, 0);
9582
9583   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9584   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9585                        aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9586 }
9587
9588 /* 64 bit exclusive or immediate.  */
9589 static void
9590 eor64 (sim_cpu *cpu, uint64_t bimm)
9591 {
9592   unsigned rn = INSTR (9, 5);
9593   unsigned rd = INSTR (4, 0);
9594
9595   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9596   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9597                        aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9598 }
9599
9600 /* 32 bit or immediate.  */
9601 static void
9602 orr32 (sim_cpu *cpu, uint32_t bimm)
9603 {
9604   unsigned rn = INSTR (9, 5);
9605   unsigned rd = INSTR (4, 0);
9606
9607   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9608   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9609                        aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9610 }
9611
9612 /* 64 bit or immediate.  */
9613 static void
9614 orr64 (sim_cpu *cpu, uint64_t bimm)
9615 {
9616   unsigned rn = INSTR (9, 5);
9617   unsigned rd = INSTR (4, 0);
9618
9619   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9620   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9621                        aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9622 }
9623
9624 /* Logical shifted register.
9625    These allow an optional LSL, ASR, LSR or ROR to the second source
9626    register with a count up to the register bit count.
9627    N.B register args may not be SP.  */
9628
9629 /* 32 bit AND shifted register.  */
9630 static void
9631 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9632 {
9633   unsigned rm = INSTR (20, 16);
9634   unsigned rn = INSTR (9, 5);
9635   unsigned rd = INSTR (4, 0);
9636
9637   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9638   aarch64_set_reg_u64
9639     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9640      & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9641 }
9642
9643 /* 64 bit AND shifted register.  */
9644 static void
9645 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9646 {
9647   unsigned rm = INSTR (20, 16);
9648   unsigned rn = INSTR (9, 5);
9649   unsigned rd = INSTR (4, 0);
9650
9651   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9652   aarch64_set_reg_u64
9653     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9654      & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9655 }
9656
9657 /* 32 bit AND shifted register setting flags.  */
9658 static void
9659 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9660 {
9661   unsigned rm = INSTR (20, 16);
9662   unsigned rn = INSTR (9, 5);
9663   unsigned rd = INSTR (4, 0);
9664
9665   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9666   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9667                                shift, count);
9668
9669   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9670   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9671   set_flags_for_binop32 (cpu, value1 & value2);
9672 }
9673
9674 /* 64 bit AND shifted register setting flags.  */
9675 static void
9676 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9677 {
9678   unsigned rm = INSTR (20, 16);
9679   unsigned rn = INSTR (9, 5);
9680   unsigned rd = INSTR (4, 0);
9681
9682   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9683   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9684                                shift, count);
9685
9686   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9687   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9688   set_flags_for_binop64 (cpu, value1 & value2);
9689 }
9690
9691 /* 32 bit BIC shifted register.  */
9692 static void
9693 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9694 {
9695   unsigned rm = INSTR (20, 16);
9696   unsigned rn = INSTR (9, 5);
9697   unsigned rd = INSTR (4, 0);
9698
9699   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9700   aarch64_set_reg_u64
9701     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9702      & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9703 }
9704
9705 /* 64 bit BIC shifted register.  */
9706 static void
9707 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9708 {
9709   unsigned rm = INSTR (20, 16);
9710   unsigned rn = INSTR (9, 5);
9711   unsigned rd = INSTR (4, 0);
9712
9713   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9714   aarch64_set_reg_u64
9715     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9716      & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9717 }
9718
9719 /* 32 bit BIC shifted register setting flags.  */
9720 static void
9721 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9722 {
9723   unsigned rm = INSTR (20, 16);
9724   unsigned rn = INSTR (9, 5);
9725   unsigned rd = INSTR (4, 0);
9726
9727   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9728   uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9729                                  shift, count);
9730
9731   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9732   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9733   set_flags_for_binop32 (cpu, value1 & value2);
9734 }
9735
9736 /* 64 bit BIC shifted register setting flags.  */
9737 static void
9738 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9739 {
9740   unsigned rm = INSTR (20, 16);
9741   unsigned rn = INSTR (9, 5);
9742   unsigned rd = INSTR (4, 0);
9743
9744   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9745   uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9746                                  shift, count);
9747
9748   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9749   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9750   set_flags_for_binop64 (cpu, value1 & value2);
9751 }
9752
9753 /* 32 bit EON shifted register.  */
9754 static void
9755 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9756 {
9757   unsigned rm = INSTR (20, 16);
9758   unsigned rn = INSTR (9, 5);
9759   unsigned rd = INSTR (4, 0);
9760
9761   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9762   aarch64_set_reg_u64
9763     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9764      ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9765 }
9766
9767 /* 64 bit EON shifted register.  */
9768 static void
9769 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9770 {
9771   unsigned rm = INSTR (20, 16);
9772   unsigned rn = INSTR (9, 5);
9773   unsigned rd = INSTR (4, 0);
9774
9775   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9776   aarch64_set_reg_u64
9777     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9778      ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9779 }
9780
9781 /* 32 bit EOR shifted register.  */
9782 static void
9783 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9784 {
9785   unsigned rm = INSTR (20, 16);
9786   unsigned rn = INSTR (9, 5);
9787   unsigned rd = INSTR (4, 0);
9788
9789   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9790   aarch64_set_reg_u64
9791     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9792      ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9793 }
9794
9795 /* 64 bit EOR shifted register.  */
9796 static void
9797 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9798 {
9799   unsigned rm = INSTR (20, 16);
9800   unsigned rn = INSTR (9, 5);
9801   unsigned rd = INSTR (4, 0);
9802
9803   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9804   aarch64_set_reg_u64
9805     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9806      ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9807 }
9808
9809 /* 32 bit ORR shifted register.  */
9810 static void
9811 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9812 {
9813   unsigned rm = INSTR (20, 16);
9814   unsigned rn = INSTR (9, 5);
9815   unsigned rd = INSTR (4, 0);
9816
9817   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9818   aarch64_set_reg_u64
9819     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9820      | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9821 }
9822
9823 /* 64 bit ORR shifted register.  */
9824 static void
9825 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9826 {
9827   unsigned rm = INSTR (20, 16);
9828   unsigned rn = INSTR (9, 5);
9829   unsigned rd = INSTR (4, 0);
9830
9831   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9832   aarch64_set_reg_u64
9833     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9834      | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9835 }
9836
9837 /* 32 bit ORN shifted register.  */
9838 static void
9839 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9840 {
9841   unsigned rm = INSTR (20, 16);
9842   unsigned rn = INSTR (9, 5);
9843   unsigned rd = INSTR (4, 0);
9844
9845   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9846   aarch64_set_reg_u64
9847     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9848      | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9849 }
9850
9851 /* 64 bit ORN shifted register.  */
9852 static void
9853 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9854 {
9855   unsigned rm = INSTR (20, 16);
9856   unsigned rn = INSTR (9, 5);
9857   unsigned rd = INSTR (4, 0);
9858
9859   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9860   aarch64_set_reg_u64
9861     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9862      | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9863 }
9864
9865 static void
9866 dexLogicalImmediate (sim_cpu *cpu)
9867 {
9868   /* assert instr[28,23] = 1001000
9869      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9870      instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9871      instr[22] = N : used to construct immediate mask
9872      instr[21,16] = immr
9873      instr[15,10] = imms
9874      instr[9,5] = Rn
9875      instr[4,0] = Rd  */
9876
9877   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
9878   uint32_t size = INSTR (31, 31);
9879   uint32_t N = INSTR (22, 22);
9880   /* uint32_t immr = INSTR (21, 16);.  */
9881   /* uint32_t imms = INSTR (15, 10);.  */
9882   uint32_t index = INSTR (22, 10);
9883   uint64_t bimm64 = LITable [index];
9884   uint32_t dispatch = INSTR (30, 29);
9885
9886   if (~size & N)
9887     HALT_UNALLOC;
9888
9889   if (!bimm64)
9890     HALT_UNALLOC;
9891
9892   if (size == 0)
9893     {
9894       uint32_t bimm = (uint32_t) bimm64;
9895
9896       switch (dispatch)
9897         {
9898         case 0: and32 (cpu, bimm); return;
9899         case 1: orr32 (cpu, bimm); return;
9900         case 2: eor32 (cpu, bimm); return;
9901         case 3: ands32 (cpu, bimm); return;
9902         }
9903     }
9904   else
9905     {
9906       switch (dispatch)
9907         {
9908         case 0: and64 (cpu, bimm64); return;
9909         case 1: orr64 (cpu, bimm64); return;
9910         case 2: eor64 (cpu, bimm64); return;
9911         case 3: ands64 (cpu, bimm64); return;
9912         }
9913     }
9914   HALT_UNALLOC;
9915 }
9916
9917 /* Immediate move.
9918    The uimm argument is a 16 bit value to be inserted into the
9919    target register the pos argument locates the 16 bit word in the
9920    dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9921    3} for 64 bit.
9922    N.B register arg may not be SP so it should be.
9923    accessed using the setGZRegisterXXX accessors.  */
9924
9925 /* 32 bit move 16 bit immediate zero remaining shorts.  */
9926 static void
9927 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9928 {
9929   unsigned rd = INSTR (4, 0);
9930
9931   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9932   aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9933 }
9934
9935 /* 64 bit move 16 bit immediate zero remaining shorts.  */
9936 static void
9937 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9938 {
9939   unsigned rd = INSTR (4, 0);
9940
9941   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9942   aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9943 }
9944
9945 /* 32 bit move 16 bit immediate negated.  */
9946 static void
9947 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9948 {
9949   unsigned rd = INSTR (4, 0);
9950
9951   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9952   aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9953 }
9954
9955 /* 64 bit move 16 bit immediate negated.  */
9956 static void
9957 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9958 {
9959   unsigned rd = INSTR (4, 0);
9960
9961   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9962   aarch64_set_reg_u64
9963     (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9964                       ^ 0xffffffffffffffffULL));
9965 }
9966
9967 /* 32 bit move 16 bit immediate keep remaining shorts.  */
9968 static void
9969 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9970 {
9971   unsigned rd = INSTR (4, 0);
9972   uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9973   uint32_t value = val << (pos * 16);
9974   uint32_t mask = ~(0xffffU << (pos * 16));
9975
9976   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9977   aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9978 }
9979
9980 /* 64 bit move 16 it immediate keep remaining shorts.  */
9981 static void
9982 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9983 {
9984   unsigned rd = INSTR (4, 0);
9985   uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9986   uint64_t value = (uint64_t) val << (pos * 16);
9987   uint64_t mask = ~(0xffffULL << (pos * 16));
9988
9989   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9990   aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9991 }
9992
9993 static void
9994 dexMoveWideImmediate (sim_cpu *cpu)
9995 {
9996   /* assert instr[28:23] = 100101
9997      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9998      instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9999      instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
10000      instr[20,5] = uimm16
10001      instr[4,0] = Rd  */
10002
10003   /* N.B. the (multiple of 16) shift is applied by the called routine,
10004      we just pass the multiplier.  */
10005
10006   uint32_t imm;
10007   uint32_t size = INSTR (31, 31);
10008   uint32_t op = INSTR (30, 29);
10009   uint32_t shift = INSTR (22, 21);
10010
10011   /* 32 bit can only shift 0 or 1 lot of 16.
10012      anything else is an unallocated instruction.  */
10013   if (size == 0 && (shift > 1))
10014     HALT_UNALLOC;
10015
10016   if (op == 1)
10017     HALT_UNALLOC;
10018
10019   imm = INSTR (20, 5);
10020
10021   if (size == 0)
10022     {
10023       if (op == 0)
10024         movn32 (cpu, imm, shift);
10025       else if (op == 2)
10026         movz32 (cpu, imm, shift);
10027       else
10028         movk32 (cpu, imm, shift);
10029     }
10030   else
10031     {
10032       if (op == 0)
10033         movn64 (cpu, imm, shift);
10034       else if (op == 2)
10035         movz64 (cpu, imm, shift);
10036       else
10037         movk64 (cpu, imm, shift);
10038     }
10039 }
10040
10041 /* Bitfield operations.
10042    These take a pair of bit positions r and s which are in {0..31}
10043    or {0..63} depending on the instruction word size.
10044    N.B register args may not be SP.  */
10045
10046 /* OK, we start with ubfm which just needs to pick
10047    some bits out of source zero the rest and write
10048    the result to dest.  Just need two logical shifts.  */
10049
10050 /* 32 bit bitfield move, left and right of affected zeroed
10051    if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
10052 static void
10053 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10054 {
10055   unsigned rd;
10056   unsigned rn = INSTR (9, 5);
10057   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10058
10059   /* Pick either s+1-r or s+1 consecutive bits out of the original word.  */
10060   if (r <= s)
10061     {
10062       /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10063          We want only bits s:xxx:r at the bottom of the word
10064          so we LSL bit s up to bit 31 i.e. by 31 - s
10065          and then we LSR to bring bit 31 down to bit s - r
10066          i.e. by 31 + r - s.  */
10067       value <<= 31 - s;
10068       value >>= 31 + r - s;
10069     }
10070   else
10071     {
10072       /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
10073          We want only bits s:xxx:0 starting at it 31-(r-1)
10074          so we LSL bit s up to bit 31 i.e. by 31 - s
10075          and then we LSL to bring bit 31 down to 31-(r-1)+s
10076          i.e. by r - (s + 1).  */
10077       value <<= 31 - s;
10078       value >>= r - (s + 1);
10079     }
10080
10081   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10082   rd = INSTR (4, 0);
10083   aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10084 }
10085
10086 /* 64 bit bitfield move, left and right of affected zeroed
10087    if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
10088 static void
10089 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10090 {
10091   unsigned rd;
10092   unsigned rn = INSTR (9, 5);
10093   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10094
10095   if (r <= s)
10096     {
10097       /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10098          We want only bits s:xxx:r at the bottom of the word.
10099          So we LSL bit s up to bit 63 i.e. by 63 - s
10100          and then we LSR to bring bit 63 down to bit s - r
10101          i.e. by 63 + r - s.  */
10102       value <<= 63 - s;
10103       value >>= 63 + r - s;
10104     }
10105   else
10106     {
10107       /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
10108          We want only bits s:xxx:0 starting at it 63-(r-1).
10109          So we LSL bit s up to bit 63 i.e. by 63 - s
10110          and then we LSL to bring bit 63 down to 63-(r-1)+s
10111          i.e. by r - (s + 1).  */
10112       value <<= 63 - s;
10113       value >>= r - (s + 1);
10114     }
10115
10116   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10117   rd = INSTR (4, 0);
10118   aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10119 }
10120
10121 /* The signed versions need to insert sign bits
10122    on the left of the inserted bit field. so we do
10123    much the same as the unsigned version except we
10124    use an arithmetic shift right -- this just means
10125    we need to operate on signed values.  */
10126
10127 /* 32 bit bitfield move, left of affected sign-extended, right zeroed.  */
10128 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
10129 static void
10130 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10131 {
10132   unsigned rd;
10133   unsigned rn = INSTR (9, 5);
10134   /* as per ubfm32 but use an ASR instead of an LSR.  */
10135   int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
10136
10137   if (r <= s)
10138     {
10139       value <<= 31 - s;
10140       value >>= 31 + r - s;
10141     }
10142   else
10143     {
10144       value <<= 31 - s;
10145       value >>= r - (s + 1);
10146     }
10147
10148   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10149   rd = INSTR (4, 0);
10150   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
10151 }
10152
10153 /* 64 bit bitfield move, left of affected sign-extended, right zeroed.  */
10154 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
10155 static void
10156 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10157 {
10158   unsigned rd;
10159   unsigned rn = INSTR (9, 5);
10160   /* acpu per ubfm but use an ASR instead of an LSR.  */
10161   int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
10162
10163   if (r <= s)
10164     {
10165       value <<= 63 - s;
10166       value >>= 63 + r - s;
10167     }
10168   else
10169     {
10170       value <<= 63 - s;
10171       value >>= r - (s + 1);
10172     }
10173
10174   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10175   rd = INSTR (4, 0);
10176   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
10177 }
10178
10179 /* Finally, these versions leave non-affected bits
10180    as is. so we need to generate the bits as per
10181    ubfm and also generate a mask to pick the
10182    bits from the original and computed values.  */
10183
10184 /* 32 bit bitfield move, non-affected bits left as is.
10185    If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
10186 static void
10187 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10188 {
10189   unsigned rn = INSTR (9, 5);
10190   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10191   uint32_t mask = -1;
10192   unsigned rd;
10193   uint32_t value2;
10194
10195   /* Pick either s+1-r or s+1 consecutive bits out of the original word.  */
10196   if (r <= s)
10197     {
10198       /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10199          We want only bits s:xxx:r at the bottom of the word
10200          so we LSL bit s up to bit 31 i.e. by 31 - s
10201          and then we LSR to bring bit 31 down to bit s - r
10202          i.e. by 31 + r - s.  */
10203       value <<= 31 - s;
10204       value >>= 31 + r - s;
10205       /* the mask must include the same bits.  */
10206       mask <<= 31 - s;
10207       mask >>= 31 + r - s;
10208     }
10209   else
10210     {
10211       /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
10212          We want only bits s:xxx:0 starting at it 31-(r-1)
10213          so we LSL bit s up to bit 31 i.e. by 31 - s
10214          and then we LSL to bring bit 31 down to 31-(r-1)+s
10215          i.e. by r - (s + 1).  */
10216       value <<= 31 - s;
10217       value >>= r - (s + 1);
10218       /* The mask must include the same bits.  */
10219       mask <<= 31 - s;
10220       mask >>= r - (s + 1);
10221     }
10222
10223   rd = INSTR (4, 0);
10224   value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10225
10226   value2 &= ~mask;
10227   value2 |= value;
10228
10229   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10230   aarch64_set_reg_u64
10231     (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
10232 }
10233
10234 /* 64 bit bitfield move, non-affected bits left as is.
10235    If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
10236 static void
10237 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10238 {
10239   unsigned rd;
10240   unsigned rn = INSTR (9, 5);
10241   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10242   uint64_t mask = 0xffffffffffffffffULL;
10243
10244   if (r <= s)
10245     {
10246       /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10247          We want only bits s:xxx:r at the bottom of the word
10248          so we LSL bit s up to bit 63 i.e. by 63 - s
10249          and then we LSR to bring bit 63 down to bit s - r
10250          i.e. by 63 + r - s.  */
10251       value <<= 63 - s;
10252       value >>= 63 + r - s;
10253       /* The mask must include the same bits.  */
10254       mask <<= 63 - s;
10255       mask >>= 63 + r - s;
10256     }
10257   else
10258     {
10259       /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10260          We want only bits s:xxx:0 starting at it 63-(r-1)
10261          so we LSL bit s up to bit 63 i.e. by 63 - s
10262          and then we LSL to bring bit 63 down to 63-(r-1)+s
10263          i.e. by r - (s + 1).  */
10264       value <<= 63 - s;
10265       value >>= r - (s + 1);
10266       /* The mask must include the same bits.  */
10267       mask <<= 63 - s;
10268       mask >>= r - (s + 1);
10269     }
10270
10271   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10272   rd = INSTR (4, 0);
10273   aarch64_set_reg_u64
10274     (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10275 }
10276
10277 static void
10278 dexBitfieldImmediate (sim_cpu *cpu)
10279 {
10280   /* assert instr[28:23] = 100110
10281      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10282      instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10283      instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10284      instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10285      instr[15,10] = imms :  0xxxxx for 32 bit, xxxxxx for 64 bit
10286      instr[9,5] = Rn
10287      instr[4,0] = Rd  */
10288
10289   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
10290   uint32_t dispatch;
10291   uint32_t imms;
10292   uint32_t size = INSTR (31, 31);
10293   uint32_t N = INSTR (22, 22);
10294   /* 32 bit operations must have immr[5] = 0 and imms[5] = 0.  */
10295   /* or else we have an UNALLOC.  */
10296   uint32_t immr = INSTR (21, 16);
10297
10298   if (~size & N)
10299     HALT_UNALLOC;
10300
10301   if (!size && uimm (immr, 5, 5))
10302     HALT_UNALLOC;
10303
10304   imms = INSTR (15, 10);
10305   if (!size && uimm (imms, 5, 5))
10306     HALT_UNALLOC;
10307
10308   /* Switch on combined size and op.  */
10309   dispatch = INSTR (31, 29);
10310   switch (dispatch)
10311     {
10312     case 0: sbfm32 (cpu, immr, imms); return;
10313     case 1: bfm32 (cpu, immr, imms); return;
10314     case 2: ubfm32 (cpu, immr, imms); return;
10315     case 4: sbfm (cpu, immr, imms); return;
10316     case 5: bfm (cpu, immr, imms); return;
10317     case 6: ubfm (cpu, immr, imms); return;
10318     default: HALT_UNALLOC;
10319     }
10320 }
10321
10322 static void
10323 do_EXTR_32 (sim_cpu *cpu)
10324 {
10325   /* instr[31:21] = 00010011100
10326      instr[20,16] = Rm
10327      instr[15,10] = imms :  0xxxxx for 32 bit
10328      instr[9,5]   = Rn
10329      instr[4,0]   = Rd  */
10330   unsigned rm   = INSTR (20, 16);
10331   unsigned imms = INSTR (15, 10) & 31;
10332   unsigned rn   = INSTR ( 9,  5);
10333   unsigned rd   = INSTR ( 4,  0);
10334   uint64_t val1;
10335   uint64_t val2;
10336
10337   val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10338   val1 >>= imms;
10339   val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10340   val2 <<= (32 - imms);
10341
10342   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10343   aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10344 }
10345
10346 static void
10347 do_EXTR_64 (sim_cpu *cpu)
10348 {
10349   /* instr[31:21] = 10010011100
10350      instr[20,16] = Rm
10351      instr[15,10] = imms
10352      instr[9,5]   = Rn
10353      instr[4,0]   = Rd  */
10354   unsigned rm   = INSTR (20, 16);
10355   unsigned imms = INSTR (15, 10) & 63;
10356   unsigned rn   = INSTR ( 9,  5);
10357   unsigned rd   = INSTR ( 4,  0);
10358   uint64_t val;
10359
10360   val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10361   val >>= imms;
10362   val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10363
10364   aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10365 }
10366
10367 static void
10368 dexExtractImmediate (sim_cpu *cpu)
10369 {
10370   /* assert instr[28:23] = 100111
10371      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
10372      instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10373      instr[22]    = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10374      instr[21]    = op0 : must be 0 or UNALLOC
10375      instr[20,16] = Rm
10376      instr[15,10] = imms :  0xxxxx for 32 bit, xxxxxx for 64 bit
10377      instr[9,5]   = Rn
10378      instr[4,0]   = Rd  */
10379
10380   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
10381   /* 64 bit operations must have N = 1 or else we have an UNALLOC.  */
10382   uint32_t dispatch;
10383   uint32_t size = INSTR (31, 31);
10384   uint32_t N = INSTR (22, 22);
10385   /* 32 bit operations must have imms[5] = 0
10386      or else we have an UNALLOC.  */
10387   uint32_t imms = INSTR (15, 10);
10388
10389   if (size ^ N)
10390     HALT_UNALLOC;
10391
10392   if (!size && uimm (imms, 5, 5))
10393     HALT_UNALLOC;
10394
10395   /* Switch on combined size and op.  */
10396   dispatch = INSTR (31, 29);
10397
10398   if (dispatch == 0)
10399     do_EXTR_32 (cpu);
10400
10401   else if (dispatch == 4)
10402     do_EXTR_64 (cpu);
10403
10404   else if (dispatch == 1)
10405     HALT_NYI;
10406   else
10407     HALT_UNALLOC;
10408 }
10409
10410 static void
10411 dexDPImm (sim_cpu *cpu)
10412 {
10413   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10414      assert  group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10415      bits [25,23] of a DPImm are the secondary dispatch vector.  */
10416   uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10417
10418   switch (group2)
10419     {
10420     case DPIMM_PCADR_000:
10421     case DPIMM_PCADR_001:
10422       dexPCRelAddressing (cpu);
10423       return;
10424
10425     case DPIMM_ADDSUB_010:
10426     case DPIMM_ADDSUB_011:
10427       dexAddSubtractImmediate (cpu);
10428       return;
10429
10430     case DPIMM_LOG_100:
10431       dexLogicalImmediate (cpu);
10432       return;
10433
10434     case DPIMM_MOV_101:
10435       dexMoveWideImmediate (cpu);
10436       return;
10437
10438     case DPIMM_BITF_110:
10439       dexBitfieldImmediate (cpu);
10440       return;
10441
10442     case DPIMM_EXTR_111:
10443       dexExtractImmediate (cpu);
10444       return;
10445
10446     default:
10447       /* Should never reach here.  */
10448       HALT_NYI;
10449     }
10450 }
10451
10452 static void
10453 dexLoadUnscaledImmediate (sim_cpu *cpu)
10454 {
10455   /* instr[29,24] == 111_00
10456      instr[21] == 0
10457      instr[11,10] == 00
10458      instr[31,30] = size
10459      instr[26] = V
10460      instr[23,22] = opc
10461      instr[20,12] = simm9
10462      instr[9,5] = rn may be SP.  */
10463   /* unsigned rt = INSTR (4, 0);  */
10464   uint32_t V = INSTR (26, 26);
10465   uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10466   int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10467
10468   if (!V)
10469     {
10470       /* GReg operations.  */
10471       switch (dispatch)
10472         {
10473         case 0:  sturb (cpu, imm); return;
10474         case 1:  ldurb32 (cpu, imm); return;
10475         case 2:  ldursb64 (cpu, imm); return;
10476         case 3:  ldursb32 (cpu, imm); return;
10477         case 4:  sturh (cpu, imm); return;
10478         case 5:  ldurh32 (cpu, imm); return;
10479         case 6:  ldursh64 (cpu, imm); return;
10480         case 7:  ldursh32 (cpu, imm); return;
10481         case 8:  stur32 (cpu, imm); return;
10482         case 9:  ldur32 (cpu, imm); return;
10483         case 10: ldursw (cpu, imm); return;
10484         case 12: stur64 (cpu, imm); return;
10485         case 13: ldur64 (cpu, imm); return;
10486
10487         case 14:
10488           /* PRFUM NYI.  */
10489           HALT_NYI;
10490
10491         default:
10492         case 11:
10493         case 15:
10494           HALT_UNALLOC;
10495         }
10496     }
10497
10498   /* FReg operations.  */
10499   switch (dispatch)
10500     {
10501     case 2:  fsturq (cpu, imm); return;
10502     case 3:  fldurq (cpu, imm); return;
10503     case 8:  fsturs (cpu, imm); return;
10504     case 9:  fldurs (cpu, imm); return;
10505     case 12: fsturd (cpu, imm); return;
10506     case 13: fldurd (cpu, imm); return;
10507
10508     case 0: /* STUR 8 bit FP.  */
10509     case 1: /* LDUR 8 bit FP.  */
10510     case 4: /* STUR 16 bit FP.  */
10511     case 5: /* LDUR 8 bit FP.  */
10512       HALT_NYI;
10513
10514     default:
10515     case 6:
10516     case 7:
10517     case 10:
10518     case 11:
10519     case 14:
10520     case 15:
10521       HALT_UNALLOC;
10522     }
10523 }
10524
10525 /*  N.B. A preliminary note regarding all the ldrs<x>32
10526     instructions
10527
10528    The signed value loaded by these instructions is cast to unsigned
10529    before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10530    64 bit element of the GReg union. this performs a 32 bit sign extension
10531    (as required) but avoids 64 bit sign extension, thus ensuring that the
10532    top half of the register word is zero. this is what the spec demands
10533    when a 32 bit load occurs.  */
10534
10535 /* 32 bit load sign-extended byte scaled unsigned 12 bit.  */
10536 static void
10537 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10538 {
10539   unsigned int rn = INSTR (9, 5);
10540   unsigned int rt = INSTR (4, 0);
10541
10542   /* The target register may not be SP but the source may be
10543      there is no scaling required for a byte load.  */
10544   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10545   aarch64_set_reg_u64 (cpu, rt, NO_SP,
10546                        (int64_t) aarch64_get_mem_s8 (cpu, address));
10547 }
10548
10549 /* 32 bit load sign-extended byte scaled or unscaled zero-
10550    or sign-extended 32-bit register offset.  */
10551 static void
10552 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10553 {
10554   unsigned int rm = INSTR (20, 16);
10555   unsigned int rn = INSTR (9, 5);
10556   unsigned int rt = INSTR (4, 0);
10557
10558   /* rn may reference SP, rm and rt must reference ZR.  */
10559
10560   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10561   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10562                                  extension);
10563
10564   /* There is no scaling required for a byte load.  */
10565   aarch64_set_reg_u64
10566     (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10567                                                    + displacement));
10568 }
10569
10570 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10571    pre- or post-writeback.  */
10572 static void
10573 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10574 {
10575   uint64_t address;
10576   unsigned int rn = INSTR (9, 5);
10577   unsigned int rt = INSTR (4, 0);
10578
10579   if (rn == rt && wb != NoWriteBack)
10580     HALT_UNALLOC;
10581
10582   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10583
10584   if (wb == Pre)
10585       address += offset;
10586
10587   aarch64_set_reg_u64 (cpu, rt, NO_SP,
10588                        (int64_t) aarch64_get_mem_s8 (cpu, address));
10589
10590   if (wb == Post)
10591     address += offset;
10592
10593   if (wb != NoWriteBack)
10594     aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10595 }
10596
10597 /* 8 bit store scaled.  */
10598 static void
10599 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10600 {
10601   unsigned st = INSTR (4, 0);
10602   unsigned rn = INSTR (9, 5);
10603
10604   aarch64_set_mem_u8 (cpu,
10605                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10606                       aarch64_get_vec_u8 (cpu, st, 0));
10607 }
10608
10609 /* 8 bit store scaled or unscaled zero- or
10610    sign-extended 8-bit register offset.  */
10611 static void
10612 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10613 {
10614   unsigned rm = INSTR (20, 16);
10615   unsigned rn = INSTR (9, 5);
10616   unsigned st = INSTR (4, 0);
10617
10618   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10619   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10620                                extension);
10621   uint64_t  displacement = scaling == Scaled ? extended : 0;
10622
10623   aarch64_set_mem_u8
10624     (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10625 }
10626
10627 /* 16 bit store scaled.  */
10628 static void
10629 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10630 {
10631   unsigned st = INSTR (4, 0);
10632   unsigned rn = INSTR (9, 5);
10633
10634   aarch64_set_mem_u16
10635     (cpu,
10636      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10637      aarch64_get_vec_u16 (cpu, st, 0));
10638 }
10639
10640 /* 16 bit store scaled or unscaled zero-
10641    or sign-extended 16-bit register offset.  */
10642 static void
10643 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10644 {
10645   unsigned rm = INSTR (20, 16);
10646   unsigned rn = INSTR (9, 5);
10647   unsigned st = INSTR (4, 0);
10648
10649   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10650   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10651                                extension);
10652   uint64_t  displacement = OPT_SCALE (extended, 16, scaling);
10653
10654   aarch64_set_mem_u16
10655     (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10656 }
10657
10658 /* 32 bit store scaled unsigned 12 bit.  */
10659 static void
10660 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10661 {
10662   unsigned st = INSTR (4, 0);
10663   unsigned rn = INSTR (9, 5);
10664
10665   aarch64_set_mem_u32
10666     (cpu,
10667      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10668      aarch64_get_vec_u32 (cpu, st, 0));
10669 }
10670
10671 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10672 static void
10673 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10674 {
10675   unsigned rn = INSTR (9, 5);
10676   unsigned st = INSTR (4, 0);
10677
10678   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10679
10680   if (wb != Post)
10681     address += offset;
10682
10683   aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10684
10685   if (wb == Post)
10686     address += offset;
10687
10688   if (wb != NoWriteBack)
10689     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10690 }
10691
10692 /* 32 bit store scaled or unscaled zero-
10693    or sign-extended 32-bit register offset.  */
10694 static void
10695 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10696 {
10697   unsigned rm = INSTR (20, 16);
10698   unsigned rn = INSTR (9, 5);
10699   unsigned st = INSTR (4, 0);
10700
10701   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10702   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10703                                extension);
10704   uint64_t  displacement = OPT_SCALE (extended, 32, scaling);
10705
10706   aarch64_set_mem_u32
10707     (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10708 }
10709
10710 /* 64 bit store scaled unsigned 12 bit.  */
10711 static void
10712 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10713 {
10714   unsigned st = INSTR (4, 0);
10715   unsigned rn = INSTR (9, 5);
10716
10717   aarch64_set_mem_u64
10718     (cpu,
10719      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10720      aarch64_get_vec_u64 (cpu, st, 0));
10721 }
10722
10723 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10724 static void
10725 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10726 {
10727   unsigned rn = INSTR (9, 5);
10728   unsigned st = INSTR (4, 0);
10729
10730   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10731
10732   if (wb != Post)
10733     address += offset;
10734
10735   aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10736
10737   if (wb == Post)
10738     address += offset;
10739
10740   if (wb != NoWriteBack)
10741     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10742 }
10743
10744 /* 64 bit store scaled or unscaled zero-
10745    or sign-extended 32-bit register offset.  */
10746 static void
10747 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10748 {
10749   unsigned rm = INSTR (20, 16);
10750   unsigned rn = INSTR (9, 5);
10751   unsigned st = INSTR (4, 0);
10752
10753   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10754   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10755                                extension);
10756   uint64_t  displacement = OPT_SCALE (extended, 64, scaling);
10757
10758   aarch64_set_mem_u64
10759     (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10760 }
10761
10762 /* 128 bit store scaled unsigned 12 bit.  */
10763 static void
10764 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10765 {
10766   FRegister a;
10767   unsigned st = INSTR (4, 0);
10768   unsigned rn = INSTR (9, 5);
10769   uint64_t addr;
10770
10771   aarch64_get_FP_long_double (cpu, st, & a);
10772
10773   addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10774   aarch64_set_mem_long_double (cpu, addr, a);
10775 }
10776
10777 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10778 static void
10779 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10780 {
10781   FRegister a;
10782   unsigned rn = INSTR (9, 5);
10783   unsigned st = INSTR (4, 0);
10784   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10785
10786   if (wb != Post)
10787     address += offset;
10788
10789   aarch64_get_FP_long_double (cpu, st, & a);
10790   aarch64_set_mem_long_double (cpu, address, a);
10791
10792   if (wb == Post)
10793     address += offset;
10794
10795   if (wb != NoWriteBack)
10796     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10797 }
10798
10799 /* 128 bit store scaled or unscaled zero-
10800    or sign-extended 32-bit register offset.  */
10801 static void
10802 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10803 {
10804   unsigned rm = INSTR (20, 16);
10805   unsigned rn = INSTR (9, 5);
10806   unsigned st = INSTR (4, 0);
10807
10808   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10809   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10810                                extension);
10811   uint64_t  displacement = OPT_SCALE (extended, 128, scaling);
10812
10813   FRegister a;
10814
10815   aarch64_get_FP_long_double (cpu, st, & a);
10816   aarch64_set_mem_long_double (cpu, address + displacement, a);
10817 }
10818
10819 static void
10820 dexLoadImmediatePrePost (sim_cpu *cpu)
10821 {
10822   /* instr[31,30] = size
10823      instr[29,27] = 111
10824      instr[26]    = V
10825      instr[25,24] = 00
10826      instr[23,22] = opc
10827      instr[21]    = 0
10828      instr[20,12] = simm9
10829      instr[11]    = wb : 0 ==> Post, 1 ==> Pre
10830      instr[10]    = 0
10831      instr[9,5]   = Rn may be SP.
10832      instr[4,0]   = Rt */
10833
10834   uint32_t  V        = INSTR (26, 26);
10835   uint32_t  dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10836   int32_t   imm      = simm32 (aarch64_get_instr (cpu), 20, 12);
10837   WriteBack wb       = INSTR (11, 11);
10838
10839   if (!V)
10840     {
10841       /* GReg operations.  */
10842       switch (dispatch)
10843         {
10844         case 0:  strb_wb (cpu, imm, wb); return;
10845         case 1:  ldrb32_wb (cpu, imm, wb); return;
10846         case 2:  ldrsb_wb (cpu, imm, wb); return;
10847         case 3:  ldrsb32_wb (cpu, imm, wb); return;
10848         case 4:  strh_wb (cpu, imm, wb); return;
10849         case 5:  ldrh32_wb (cpu, imm, wb); return;
10850         case 6:  ldrsh64_wb (cpu, imm, wb); return;
10851         case 7:  ldrsh32_wb (cpu, imm, wb); return;
10852         case 8:  str32_wb (cpu, imm, wb); return;
10853         case 9:  ldr32_wb (cpu, imm, wb); return;
10854         case 10: ldrsw_wb (cpu, imm, wb); return;
10855         case 12: str_wb (cpu, imm, wb); return;
10856         case 13: ldr_wb (cpu, imm, wb); return;
10857
10858         default:
10859         case 11:
10860         case 14:
10861         case 15:
10862           HALT_UNALLOC;
10863         }
10864     }
10865
10866   /* FReg operations.  */
10867   switch (dispatch)
10868     {
10869     case 2:  fstrq_wb (cpu, imm, wb); return;
10870     case 3:  fldrq_wb (cpu, imm, wb); return;
10871     case 8:  fstrs_wb (cpu, imm, wb); return;
10872     case 9:  fldrs_wb (cpu, imm, wb); return;
10873     case 12: fstrd_wb (cpu, imm, wb); return;
10874     case 13: fldrd_wb (cpu, imm, wb); return;
10875
10876     case 0:       /* STUR 8 bit FP.  */
10877     case 1:       /* LDUR 8 bit FP.  */
10878     case 4:       /* STUR 16 bit FP.  */
10879     case 5:       /* LDUR 8 bit FP.  */
10880       HALT_NYI;
10881
10882     default:
10883     case 6:
10884     case 7:
10885     case 10:
10886     case 11:
10887     case 14:
10888     case 15:
10889       HALT_UNALLOC;
10890     }
10891 }
10892
10893 static void
10894 dexLoadRegisterOffset (sim_cpu *cpu)
10895 {
10896   /* instr[31,30] = size
10897      instr[29,27] = 111
10898      instr[26]    = V
10899      instr[25,24] = 00
10900      instr[23,22] = opc
10901      instr[21]    = 1
10902      instr[20,16] = rm
10903      instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10904                              110 ==> SXTW, 111 ==> SXTX,
10905                              ow ==> RESERVED
10906      instr[12]    = scaled
10907      instr[11,10] = 10
10908      instr[9,5]   = rn
10909      instr[4,0]   = rt.  */
10910
10911   uint32_t  V = INSTR (26, 26);
10912   uint32_t  dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10913   Scaling   scale = INSTR (12, 12);
10914   Extension extensionType = INSTR (15, 13);
10915
10916   /* Check for illegal extension types.  */
10917   if (uimm (extensionType, 1, 1) == 0)
10918     HALT_UNALLOC;
10919
10920   if (extensionType == UXTX || extensionType == SXTX)
10921     extensionType = NoExtension;
10922
10923   if (!V)
10924     {
10925       /* GReg operations.  */
10926       switch (dispatch)
10927         {
10928         case 0:  strb_scale_ext (cpu, scale, extensionType); return;
10929         case 1:  ldrb32_scale_ext (cpu, scale, extensionType); return;
10930         case 2:  ldrsb_scale_ext (cpu, scale, extensionType); return;
10931         case 3:  ldrsb32_scale_ext (cpu, scale, extensionType); return;
10932         case 4:  strh_scale_ext (cpu, scale, extensionType); return;
10933         case 5:  ldrh32_scale_ext (cpu, scale, extensionType); return;
10934         case 6:  ldrsh_scale_ext (cpu, scale, extensionType); return;
10935         case 7:  ldrsh32_scale_ext (cpu, scale, extensionType); return;
10936         case 8:  str32_scale_ext (cpu, scale, extensionType); return;
10937         case 9:  ldr32_scale_ext (cpu, scale, extensionType); return;
10938         case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10939         case 12: str_scale_ext (cpu, scale, extensionType); return;
10940         case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10941         case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10942
10943         default:
10944         case 11:
10945         case 15:
10946           HALT_UNALLOC;
10947         }
10948     }
10949
10950   /* FReg operations.  */
10951   switch (dispatch)
10952     {
10953     case 1: /* LDUR 8 bit FP.  */
10954       HALT_NYI;
10955     case 3:  fldrq_scale_ext (cpu, scale, extensionType); return;
10956     case 5: /* LDUR 8 bit FP.  */
10957       HALT_NYI;
10958     case 9:  fldrs_scale_ext (cpu, scale, extensionType); return;
10959     case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10960
10961     case 0:  fstrb_scale_ext (cpu, scale, extensionType); return;
10962     case 2:  fstrq_scale_ext (cpu, scale, extensionType); return;
10963     case 4:  fstrh_scale_ext (cpu, scale, extensionType); return;
10964     case 8:  fstrs_scale_ext (cpu, scale, extensionType); return;
10965     case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10966
10967     default:
10968     case 6:
10969     case 7:
10970     case 10:
10971     case 11:
10972     case 14:
10973     case 15:
10974       HALT_UNALLOC;
10975     }
10976 }
10977
10978 static void
10979 dexLoadUnsignedImmediate (sim_cpu *cpu)
10980 {
10981   /* instr[29,24] == 111_01
10982      instr[31,30] = size
10983      instr[26]    = V
10984      instr[23,22] = opc
10985      instr[21,10] = uimm12 : unsigned immediate offset
10986      instr[9,5]   = rn may be SP.
10987      instr[4,0]   = rt.  */
10988
10989   uint32_t V = INSTR (26,26);
10990   uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10991   uint32_t imm = INSTR (21, 10);
10992
10993   if (!V)
10994     {
10995       /* GReg operations.  */
10996       switch (dispatch)
10997         {
10998         case 0:  strb_abs (cpu, imm); return;
10999         case 1:  ldrb32_abs (cpu, imm); return;
11000         case 2:  ldrsb_abs (cpu, imm); return;
11001         case 3:  ldrsb32_abs (cpu, imm); return;
11002         case 4:  strh_abs (cpu, imm); return;
11003         case 5:  ldrh32_abs (cpu, imm); return;
11004         case 6:  ldrsh_abs (cpu, imm); return;
11005         case 7:  ldrsh32_abs (cpu, imm); return;
11006         case 8:  str32_abs (cpu, imm); return;
11007         case 9:  ldr32_abs (cpu, imm); return;
11008         case 10: ldrsw_abs (cpu, imm); return;
11009         case 12: str_abs (cpu, imm); return;
11010         case 13: ldr_abs (cpu, imm); return;
11011         case 14: prfm_abs (cpu, imm); return;
11012
11013         default:
11014         case 11:
11015         case 15:
11016           HALT_UNALLOC;
11017         }
11018     }
11019
11020   /* FReg operations.  */
11021   switch (dispatch)
11022     {
11023     case 0:  fstrb_abs (cpu, imm); return;
11024     case 4:  fstrh_abs (cpu, imm); return;
11025     case 8:  fstrs_abs (cpu, imm); return;
11026     case 12: fstrd_abs (cpu, imm); return;
11027     case 2:  fstrq_abs (cpu, imm); return;
11028
11029     case 1:  fldrb_abs (cpu, imm); return;
11030     case 5:  fldrh_abs (cpu, imm); return;
11031     case 9:  fldrs_abs (cpu, imm); return;
11032     case 13: fldrd_abs (cpu, imm); return;
11033     case 3:  fldrq_abs (cpu, imm); return;
11034
11035     default:
11036     case 6:
11037     case 7:
11038     case 10:
11039     case 11:
11040     case 14:
11041     case 15:
11042       HALT_UNALLOC;
11043     }
11044 }
11045
11046 static void
11047 dexLoadExclusive (sim_cpu *cpu)
11048 {
11049   /* assert instr[29:24] = 001000;
11050      instr[31,30] = size
11051      instr[23] = 0 if exclusive
11052      instr[22] = L : 1 if load, 0 if store
11053      instr[21] = 1 if pair
11054      instr[20,16] = Rs
11055      instr[15] = o0 : 1 if ordered
11056      instr[14,10] = Rt2
11057      instr[9,5] = Rn
11058      instr[4.0] = Rt.  */
11059
11060   switch (INSTR (22, 21))
11061     {
11062     case 2:   ldxr (cpu); return;
11063     case 0:   stxr (cpu); return;
11064     default:  HALT_NYI;
11065     }
11066 }
11067
11068 static void
11069 dexLoadOther (sim_cpu *cpu)
11070 {
11071   uint32_t dispatch;
11072
11073   /* instr[29,25] = 111_0
11074      instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
11075      instr[21:11,10] is the secondary dispatch.  */
11076   if (INSTR (24, 24))
11077     {
11078       dexLoadUnsignedImmediate (cpu);
11079       return;
11080     }
11081
11082   dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
11083   switch (dispatch)
11084     {
11085     case 0: dexLoadUnscaledImmediate (cpu); return;
11086     case 1: dexLoadImmediatePrePost (cpu); return;
11087     case 3: dexLoadImmediatePrePost (cpu); return;
11088     case 6: dexLoadRegisterOffset (cpu); return;
11089
11090     default:
11091     case 2:
11092     case 4:
11093     case 5:
11094     case 7:
11095       HALT_NYI;
11096     }
11097 }
11098
11099 static void
11100 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11101 {
11102   unsigned rn = INSTR (14, 10);
11103   unsigned rd = INSTR (9, 5);
11104   unsigned rm = INSTR (4, 0);
11105   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11106
11107   if ((rn == rd || rm == rd) && wb != NoWriteBack)
11108     HALT_UNALLOC; /* ??? */
11109
11110   offset <<= 2;
11111
11112   if (wb != Post)
11113     address += offset;
11114
11115   aarch64_set_mem_u32 (cpu, address,
11116                        aarch64_get_reg_u32 (cpu, rm, NO_SP));
11117   aarch64_set_mem_u32 (cpu, address + 4,
11118                        aarch64_get_reg_u32 (cpu, rn, NO_SP));
11119
11120   if (wb == Post)
11121     address += offset;
11122
11123   if (wb != NoWriteBack)
11124     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11125 }
11126
11127 static void
11128 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11129 {
11130   unsigned rn = INSTR (14, 10);
11131   unsigned rd = INSTR (9, 5);
11132   unsigned rm = INSTR (4, 0);
11133   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11134
11135   if ((rn == rd || rm == rd) && wb != NoWriteBack)
11136     HALT_UNALLOC; /* ??? */
11137
11138   offset <<= 3;
11139
11140   if (wb != Post)
11141     address += offset;
11142
11143   aarch64_set_mem_u64 (cpu, address,
11144                        aarch64_get_reg_u64 (cpu, rm, NO_SP));
11145   aarch64_set_mem_u64 (cpu, address + 8,
11146                        aarch64_get_reg_u64 (cpu, rn, NO_SP));
11147
11148   if (wb == Post)
11149     address += offset;
11150
11151   if (wb != NoWriteBack)
11152     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11153 }
11154
11155 static void
11156 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11157 {
11158   unsigned rn = INSTR (14, 10);
11159   unsigned rd = INSTR (9, 5);
11160   unsigned rm = INSTR (4, 0);
11161   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11162
11163   /* Treat this as unalloc to make sure we don't do it.  */
11164   if (rn == rm)
11165     HALT_UNALLOC;
11166
11167   offset <<= 2;
11168
11169   if (wb != Post)
11170     address += offset;
11171
11172   aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
11173   aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
11174
11175   if (wb == Post)
11176     address += offset;
11177
11178   if (wb != NoWriteBack)
11179     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11180 }
11181
11182 static void
11183 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11184 {
11185   unsigned rn = INSTR (14, 10);
11186   unsigned rd = INSTR (9, 5);
11187   unsigned rm = INSTR (4, 0);
11188   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11189
11190   /* Treat this as unalloc to make sure we don't do it.  */
11191   if (rn == rm)
11192     HALT_UNALLOC;
11193
11194   offset <<= 2;
11195
11196   if (wb != Post)
11197     address += offset;
11198
11199   aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
11200   aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
11201
11202   if (wb == Post)
11203     address += offset;
11204
11205   if (wb != NoWriteBack)
11206     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11207 }
11208
11209 static void
11210 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11211 {
11212   unsigned rn = INSTR (14, 10);
11213   unsigned rd = INSTR (9, 5);
11214   unsigned rm = INSTR (4, 0);
11215   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11216
11217   /* Treat this as unalloc to make sure we don't do it.  */
11218   if (rn == rm)
11219     HALT_UNALLOC;
11220
11221   offset <<= 3;
11222
11223   if (wb != Post)
11224     address += offset;
11225
11226   aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
11227   aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
11228
11229   if (wb == Post)
11230     address += offset;
11231
11232   if (wb != NoWriteBack)
11233     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11234 }
11235
11236 static void
11237 dex_load_store_pair_gr (sim_cpu *cpu)
11238 {
11239   /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
11240      instr[29,25] = instruction encoding: 101_0
11241      instr[26]    = V : 1 if fp 0 if gp
11242      instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11243      instr[22]    = load/store (1=> load)
11244      instr[21,15] = signed, scaled, offset
11245      instr[14,10] = Rn
11246      instr[ 9, 5] = Rd
11247      instr[ 4, 0] = Rm.  */
11248
11249   uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11250   int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11251
11252   switch (dispatch)
11253     {
11254     case 2: store_pair_u32 (cpu, offset, Post); return;
11255     case 3: load_pair_u32  (cpu, offset, Post); return;
11256     case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11257     case 5: load_pair_u32  (cpu, offset, NoWriteBack); return;
11258     case 6: store_pair_u32 (cpu, offset, Pre); return;
11259     case 7: load_pair_u32  (cpu, offset, Pre); return;
11260
11261     case 11: load_pair_s32  (cpu, offset, Post); return;
11262     case 13: load_pair_s32  (cpu, offset, NoWriteBack); return;
11263     case 15: load_pair_s32  (cpu, offset, Pre); return;
11264
11265     case 18: store_pair_u64 (cpu, offset, Post); return;
11266     case 19: load_pair_u64  (cpu, offset, Post); return;
11267     case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11268     case 21: load_pair_u64  (cpu, offset, NoWriteBack); return;
11269     case 22: store_pair_u64 (cpu, offset, Pre); return;
11270     case 23: load_pair_u64  (cpu, offset, Pre); return;
11271
11272     default:
11273       HALT_UNALLOC;
11274     }
11275 }
11276
11277 static void
11278 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11279 {
11280   unsigned rn = INSTR (14, 10);
11281   unsigned rd = INSTR (9, 5);
11282   unsigned rm = INSTR (4, 0);
11283   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11284
11285   offset <<= 2;
11286
11287   if (wb != Post)
11288     address += offset;
11289
11290   aarch64_set_mem_u32 (cpu, address,     aarch64_get_vec_u32 (cpu, rm, 0));
11291   aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11292
11293   if (wb == Post)
11294     address += offset;
11295
11296   if (wb != NoWriteBack)
11297     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11298 }
11299
11300 static void
11301 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11302 {
11303   unsigned rn = INSTR (14, 10);
11304   unsigned rd = INSTR (9, 5);
11305   unsigned rm = INSTR (4, 0);
11306   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11307
11308   offset <<= 3;
11309
11310   if (wb != Post)
11311     address += offset;
11312
11313   aarch64_set_mem_u64 (cpu, address,     aarch64_get_vec_u64 (cpu, rm, 0));
11314   aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11315
11316   if (wb == Post)
11317     address += offset;
11318
11319   if (wb != NoWriteBack)
11320     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11321 }
11322
11323 static void
11324 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11325 {
11326   FRegister a;
11327   unsigned rn = INSTR (14, 10);
11328   unsigned rd = INSTR (9, 5);
11329   unsigned rm = INSTR (4, 0);
11330   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11331
11332   offset <<= 4;
11333
11334   if (wb != Post)
11335     address += offset;
11336
11337   aarch64_get_FP_long_double (cpu, rm, & a);
11338   aarch64_set_mem_long_double (cpu, address, a);
11339   aarch64_get_FP_long_double (cpu, rn, & a);
11340   aarch64_set_mem_long_double (cpu, address + 16, a);
11341
11342   if (wb == Post)
11343     address += offset;
11344
11345   if (wb != NoWriteBack)
11346     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11347 }
11348
11349 static void
11350 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11351 {
11352   unsigned rn = INSTR (14, 10);
11353   unsigned rd = INSTR (9, 5);
11354   unsigned rm = INSTR (4, 0);
11355   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11356
11357   if (rm == rn)
11358     HALT_UNALLOC;
11359
11360   offset <<= 2;
11361
11362   if (wb != Post)
11363     address += offset;
11364
11365   aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11366   aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11367
11368   if (wb == Post)
11369     address += offset;
11370
11371   if (wb != NoWriteBack)
11372     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11373 }
11374
11375 static void
11376 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11377 {
11378   unsigned rn = INSTR (14, 10);
11379   unsigned rd = INSTR (9, 5);
11380   unsigned rm = INSTR (4, 0);
11381   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11382
11383   if (rm == rn)
11384     HALT_UNALLOC;
11385
11386   offset <<= 3;
11387
11388   if (wb != Post)
11389     address += offset;
11390
11391   aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11392   aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11393
11394   if (wb == Post)
11395     address += offset;
11396
11397   if (wb != NoWriteBack)
11398     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11399 }
11400
11401 static void
11402 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11403 {
11404   FRegister a;
11405   unsigned rn = INSTR (14, 10);
11406   unsigned rd = INSTR (9, 5);
11407   unsigned rm = INSTR (4, 0);
11408   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11409
11410   if (rm == rn)
11411     HALT_UNALLOC;
11412
11413   offset <<= 4;
11414
11415   if (wb != Post)
11416     address += offset;
11417
11418   aarch64_get_mem_long_double (cpu, address, & a);
11419   aarch64_set_FP_long_double (cpu, rm, a);
11420   aarch64_get_mem_long_double (cpu, address + 16, & a);
11421   aarch64_set_FP_long_double (cpu, rn, a);
11422
11423   if (wb == Post)
11424     address += offset;
11425
11426   if (wb != NoWriteBack)
11427     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11428 }
11429
11430 static void
11431 dex_load_store_pair_fp (sim_cpu *cpu)
11432 {
11433   /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11434      instr[29,25] = instruction encoding
11435      instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11436      instr[22]    = load/store (1=> load)
11437      instr[21,15] = signed, scaled, offset
11438      instr[14,10] = Rn
11439      instr[ 9, 5] = Rd
11440      instr[ 4, 0] = Rm  */
11441
11442   uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11443   int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11444
11445   switch (dispatch)
11446     {
11447     case 2: store_pair_float (cpu, offset, Post); return;
11448     case 3: load_pair_float  (cpu, offset, Post); return;
11449     case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11450     case 5: load_pair_float  (cpu, offset, NoWriteBack); return;
11451     case 6: store_pair_float (cpu, offset, Pre); return;
11452     case 7: load_pair_float  (cpu, offset, Pre); return;
11453
11454     case 10: store_pair_double (cpu, offset, Post); return;
11455     case 11: load_pair_double  (cpu, offset, Post); return;
11456     case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11457     case 13: load_pair_double  (cpu, offset, NoWriteBack); return;
11458     case 14: store_pair_double (cpu, offset, Pre); return;
11459     case 15: load_pair_double  (cpu, offset, Pre); return;
11460
11461     case 18: store_pair_long_double (cpu, offset, Post); return;
11462     case 19: load_pair_long_double  (cpu, offset, Post); return;
11463     case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11464     case 21: load_pair_long_double  (cpu, offset, NoWriteBack); return;
11465     case 22: store_pair_long_double (cpu, offset, Pre); return;
11466     case 23: load_pair_long_double  (cpu, offset, Pre); return;
11467
11468     default:
11469       HALT_UNALLOC;
11470     }
11471 }
11472
11473 static inline unsigned
11474 vec_reg (unsigned v, unsigned o)
11475 {
11476   return (v + o) & 0x3F;
11477 }
11478
11479 /* Load multiple N-element structures to N consecutive registers.  */
11480 static void
11481 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
11482 {
11483   int      all  = INSTR (30, 30);
11484   unsigned size = INSTR (11, 10);
11485   unsigned vd   = INSTR (4, 0);
11486   unsigned i;
11487
11488   switch (size)
11489     {
11490     case 0: /* 8-bit operations.  */
11491       if (all)
11492         for (i = 0; i < (16 * N); i++)
11493           aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
11494                               aarch64_get_mem_u8 (cpu, address + i));
11495       else
11496         for (i = 0; i < (8 * N); i++)
11497           aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
11498                               aarch64_get_mem_u8 (cpu, address + i));
11499       return;
11500
11501     case 1: /* 16-bit operations.  */
11502       if (all)
11503         for (i = 0; i < (8 * N); i++)
11504           aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
11505                                aarch64_get_mem_u16 (cpu, address + i * 2));
11506       else
11507         for (i = 0; i < (4 * N); i++)
11508           aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
11509                                aarch64_get_mem_u16 (cpu, address + i * 2));
11510       return;
11511
11512     case 2: /* 32-bit operations.  */
11513       if (all)
11514         for (i = 0; i < (4 * N); i++)
11515           aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
11516                                aarch64_get_mem_u32 (cpu, address + i * 4));
11517       else
11518         for (i = 0; i < (2 * N); i++)
11519           aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
11520                                aarch64_get_mem_u32 (cpu, address + i * 4));
11521       return;
11522
11523     case 3: /* 64-bit operations.  */
11524       if (all)
11525         for (i = 0; i < (2 * N); i++)
11526           aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
11527                                aarch64_get_mem_u64 (cpu, address + i * 8));
11528       else
11529         for (i = 0; i < N; i++)
11530           aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
11531                                aarch64_get_mem_u64 (cpu, address + i * 8));
11532       return;
11533     }
11534 }
11535
11536 /* LD4: load multiple 4-element to four consecutive registers.  */
11537 static void
11538 LD4 (sim_cpu *cpu, uint64_t address)
11539 {
11540   vec_load (cpu, address, 4);
11541 }
11542
11543 /* LD3: load multiple 3-element structures to three consecutive registers.  */
11544 static void
11545 LD3 (sim_cpu *cpu, uint64_t address)
11546 {
11547   vec_load (cpu, address, 3);
11548 }
11549
11550 /* LD2: load multiple 2-element structures to two consecutive registers.  */
11551 static void
11552 LD2 (sim_cpu *cpu, uint64_t address)
11553 {
11554   vec_load (cpu, address, 2);
11555 }
11556
11557 /* Load multiple 1-element structures into one register.  */
11558 static void
11559 LD1_1 (sim_cpu *cpu, uint64_t address)
11560 {
11561   int      all  = INSTR (30, 30);
11562   unsigned size = INSTR (11, 10);
11563   unsigned vd   = INSTR (4, 0);
11564   unsigned i;
11565
11566   switch (size)
11567     {
11568     case 0:
11569       /* LD1 {Vd.16b}, addr, #16 */
11570       /* LD1 {Vd.8b}, addr, #8 */
11571       for (i = 0; i < (all ? 16 : 8); i++)
11572         aarch64_set_vec_u8 (cpu, vd, i,
11573                             aarch64_get_mem_u8 (cpu, address + i));
11574       return;
11575
11576     case 1:
11577       /* LD1 {Vd.8h}, addr, #16 */
11578       /* LD1 {Vd.4h}, addr, #8 */
11579       for (i = 0; i < (all ? 8 : 4); i++)
11580         aarch64_set_vec_u16 (cpu, vd, i,
11581                              aarch64_get_mem_u16 (cpu, address + i * 2));
11582       return;
11583
11584     case 2:
11585       /* LD1 {Vd.4s}, addr, #16 */
11586       /* LD1 {Vd.2s}, addr, #8 */
11587       for (i = 0; i < (all ? 4 : 2); i++)
11588         aarch64_set_vec_u32 (cpu, vd, i,
11589                              aarch64_get_mem_u32 (cpu, address + i * 4));
11590       return;
11591
11592     case 3:
11593       /* LD1 {Vd.2d}, addr, #16 */
11594       /* LD1 {Vd.1d}, addr, #8 */
11595       for (i = 0; i < (all ? 2 : 1); i++)
11596         aarch64_set_vec_u64 (cpu, vd, i,
11597                              aarch64_get_mem_u64 (cpu, address + i * 8));
11598       return;
11599     }
11600 }
11601
11602 /* Load multiple 1-element structures into two registers.  */
11603 static void
11604 LD1_2 (sim_cpu *cpu, uint64_t address)
11605 {
11606   /* FIXME: This algorithm is *exactly* the same as the LD2 version.
11607      So why have two different instructions ?  There must be something
11608      wrong somewhere.  */
11609   vec_load (cpu, address, 2);
11610 }
11611
11612 /* Load multiple 1-element structures into three registers.  */
11613 static void
11614 LD1_3 (sim_cpu *cpu, uint64_t address)
11615 {
11616   /* FIXME: This algorithm is *exactly* the same as the LD3 version.
11617      So why have two different instructions ?  There must be something
11618      wrong somewhere.  */
11619   vec_load (cpu, address, 3);
11620 }
11621
11622 /* Load multiple 1-element structures into four registers.  */
11623 static void
11624 LD1_4 (sim_cpu *cpu, uint64_t address)
11625 {
11626   /* FIXME: This algorithm is *exactly* the same as the LD4 version.
11627      So why have two different instructions ?  There must be something
11628      wrong somewhere.  */
11629   vec_load (cpu, address, 4);
11630 }
11631
11632 /* Store multiple N-element structures to N consecutive registers.  */
11633 static void
11634 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
11635 {
11636   int      all  = INSTR (30, 30);
11637   unsigned size = INSTR (11, 10);
11638   unsigned vd   = INSTR (4, 0);
11639   unsigned i;
11640
11641   switch (size)
11642     {
11643     case 0: /* 8-bit operations.  */
11644       if (all)
11645         for (i = 0; i < (16 * N); i++)
11646           aarch64_set_mem_u8
11647             (cpu, address + i,
11648              aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
11649       else
11650         for (i = 0; i < (8 * N); i++)
11651           aarch64_set_mem_u8
11652             (cpu, address + i,
11653              aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
11654       return;
11655
11656     case 1: /* 16-bit operations.  */
11657       if (all)
11658         for (i = 0; i < (8 * N); i++)
11659           aarch64_set_mem_u16
11660             (cpu, address + i * 2,
11661              aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
11662       else
11663         for (i = 0; i < (4 * N); i++)
11664           aarch64_set_mem_u16
11665             (cpu, address + i * 2,
11666              aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
11667       return;
11668
11669     case 2: /* 32-bit operations.  */
11670       if (all)
11671         for (i = 0; i < (4 * N); i++)
11672           aarch64_set_mem_u32
11673             (cpu, address + i * 4,
11674              aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
11675       else
11676         for (i = 0; i < (2 * N); i++)
11677           aarch64_set_mem_u32
11678             (cpu, address + i * 4,
11679              aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
11680       return;
11681
11682     case 3: /* 64-bit operations.  */
11683       if (all)
11684         for (i = 0; i < (2 * N); i++)
11685           aarch64_set_mem_u64
11686             (cpu, address + i * 8,
11687              aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
11688       else
11689         for (i = 0; i < N; i++)
11690           aarch64_set_mem_u64
11691             (cpu, address + i * 8,
11692              aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
11693       return;
11694     }
11695 }
11696
11697 /* Store multiple 4-element structure to four consecutive registers.  */
11698 static void
11699 ST4 (sim_cpu *cpu, uint64_t address)
11700 {
11701   vec_store (cpu, address, 4);
11702 }
11703
11704 /* Store multiple 3-element structures to three consecutive registers.  */
11705 static void
11706 ST3 (sim_cpu *cpu, uint64_t address)
11707 {
11708   vec_store (cpu, address, 3);
11709 }
11710
11711 /* Store multiple 2-element structures to two consecutive registers.  */
11712 static void
11713 ST2 (sim_cpu *cpu, uint64_t address)
11714 {
11715   vec_store (cpu, address, 2);
11716 }
11717
11718 /* Store multiple 1-element structures into one register.  */
11719 static void
11720 ST1_1 (sim_cpu *cpu, uint64_t address)
11721 {
11722   int      all  = INSTR (30, 30);
11723   unsigned size = INSTR (11, 10);
11724   unsigned vd   = INSTR (4, 0);
11725   unsigned i;
11726
11727   switch (size)
11728     {
11729     case 0:
11730       for (i = 0; i < (all ? 16 : 8); i++)
11731         aarch64_set_mem_u8 (cpu, address + i,
11732                             aarch64_get_vec_u8 (cpu, vd, i));
11733       return;
11734
11735     case 1:
11736       for (i = 0; i < (all ? 8 : 4); i++)
11737         aarch64_set_mem_u16 (cpu, address + i * 2,
11738                              aarch64_get_vec_u16 (cpu, vd, i));
11739       return;
11740
11741     case 2:
11742       for (i = 0; i < (all ? 4 : 2); i++)
11743         aarch64_set_mem_u32 (cpu, address + i * 4,
11744                              aarch64_get_vec_u32 (cpu, vd, i));
11745       return;
11746
11747     case 3:
11748       for (i = 0; i < (all ? 2 : 1); i++)
11749         aarch64_set_mem_u64 (cpu, address + i * 8,
11750                              aarch64_get_vec_u64 (cpu, vd, i));
11751       return;
11752     }
11753 }
11754
11755 /* Store multiple 1-element structures into two registers.  */
11756 static void
11757 ST1_2 (sim_cpu *cpu, uint64_t address)
11758 {
11759   /* FIXME: This algorithm is *exactly* the same as the ST2 version.
11760      So why have two different instructions ?  There must be
11761      something wrong somewhere.  */
11762   vec_store (cpu, address, 2);
11763 }
11764
11765 /* Store multiple 1-element structures into three registers.  */
11766 static void
11767 ST1_3 (sim_cpu *cpu, uint64_t address)
11768 {
11769   /* FIXME: This algorithm is *exactly* the same as the ST3 version.
11770      So why have two different instructions ?  There must be
11771      something wrong somewhere.  */
11772   vec_store (cpu, address, 3);
11773 }
11774
11775 /* Store multiple 1-element structures into four registers.  */
11776 static void
11777 ST1_4 (sim_cpu *cpu, uint64_t address)
11778 {
11779   /* FIXME: This algorithm is *exactly* the same as the ST4 version.
11780      So why have two different instructions ?  There must be
11781      something wrong somewhere.  */
11782   vec_store (cpu, address, 4);
11783 }
11784
11785 #define LDn_STn_SINGLE_LANE_AND_SIZE()                          \
11786   do                                                            \
11787     {                                                           \
11788       switch (INSTR (15, 14))                                   \
11789         {                                                       \
11790         case 0:                                                 \
11791           lane = (full << 3) | (s << 2) | size;                 \
11792           size = 0;                                             \
11793           break;                                                \
11794                                                                 \
11795         case 1:                                                 \
11796           if ((size & 1) == 1)                                  \
11797             HALT_UNALLOC;                                       \
11798           lane = (full << 2) | (s << 1) | (size >> 1);          \
11799           size = 1;                                             \
11800           break;                                                \
11801                                                                 \
11802         case 2:                                                 \
11803           if ((size & 2) == 2)                                  \
11804             HALT_UNALLOC;                                       \
11805                                                                 \
11806           if ((size & 1) == 0)                                  \
11807             {                                                   \
11808               lane = (full << 1) | s;                           \
11809               size = 2;                                         \
11810             }                                                   \
11811           else                                                  \
11812             {                                                   \
11813               if (s)                                            \
11814                 HALT_UNALLOC;                                   \
11815               lane = full;                                      \
11816               size = 3;                                         \
11817             }                                                   \
11818           break;                                                \
11819                                                                 \
11820         default:                                                \
11821           HALT_UNALLOC;                                         \
11822         }                                                       \
11823     }                                                           \
11824   while (0)
11825
11826 /* Load single structure into one lane of N registers.  */
11827 static void
11828 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11829 {
11830   /* instr[31]    = 0
11831      instr[30]    = element selector 0=>half, 1=>all elements
11832      instr[29,24] = 00 1101
11833      instr[23]    = 0=>simple, 1=>post
11834      instr[22]    = 1
11835      instr[21]    = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11836      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11837                       11111 (immediate post inc)
11838      instr[15,13] = opcode
11839      instr[12]    = S, used for lane number
11840      instr[11,10] = size, also used for lane number
11841      instr[9,5]   = address
11842      instr[4,0]   = Vd  */
11843
11844   unsigned full = INSTR (30, 30);
11845   unsigned vd = INSTR (4, 0);
11846   unsigned size = INSTR (11, 10);
11847   unsigned s = INSTR (12, 12);
11848   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11849   int lane = 0;
11850   int i;
11851
11852   NYI_assert (29, 24, 0x0D);
11853   NYI_assert (22, 22, 1);
11854
11855   /* Compute the lane number first (using size), and then compute size.  */
11856   LDn_STn_SINGLE_LANE_AND_SIZE ();
11857
11858   for (i = 0; i < nregs; i++)
11859     switch (size)
11860       {
11861       case 0:
11862         {
11863           uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11864           aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11865           break;
11866         }
11867
11868       case 1:
11869         {
11870           uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11871           aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11872           break;
11873         }
11874
11875       case 2:
11876         {
11877           uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11878           aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11879           break;
11880         }
11881
11882       case 3:
11883         {
11884           uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11885           aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11886           break;
11887         }
11888       }
11889 }
11890
11891 /* Store single structure from one lane from N registers.  */
11892 static void
11893 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11894 {
11895   /* instr[31]    = 0
11896      instr[30]    = element selector 0=>half, 1=>all elements
11897      instr[29,24] = 00 1101
11898      instr[23]    = 0=>simple, 1=>post
11899      instr[22]    = 0
11900      instr[21]    = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11901      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11902                       11111 (immediate post inc)
11903      instr[15,13] = opcode
11904      instr[12]    = S, used for lane number
11905      instr[11,10] = size, also used for lane number
11906      instr[9,5]   = address
11907      instr[4,0]   = Vd  */
11908
11909   unsigned full = INSTR (30, 30);
11910   unsigned vd = INSTR (4, 0);
11911   unsigned size = INSTR (11, 10);
11912   unsigned s = INSTR (12, 12);
11913   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11914   int lane = 0;
11915   int i;
11916
11917   NYI_assert (29, 24, 0x0D);
11918   NYI_assert (22, 22, 0);
11919
11920   /* Compute the lane number first (using size), and then compute size.  */
11921   LDn_STn_SINGLE_LANE_AND_SIZE ();
11922
11923   for (i = 0; i < nregs; i++)
11924     switch (size)
11925       {
11926       case 0:
11927         {
11928           uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11929           aarch64_set_mem_u8 (cpu, address + i, val);
11930           break;
11931         }
11932
11933       case 1:
11934         {
11935           uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11936           aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11937           break;
11938         }
11939
11940       case 2:
11941         {
11942           uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11943           aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11944           break;
11945         }
11946
11947       case 3:
11948         {
11949           uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11950           aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11951           break;
11952         }
11953       }
11954 }
11955
11956 /* Load single structure into all lanes of N registers.  */
11957 static void
11958 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11959 {
11960   /* instr[31]    = 0
11961      instr[30]    = element selector 0=>half, 1=>all elements
11962      instr[29,24] = 00 1101
11963      instr[23]    = 0=>simple, 1=>post
11964      instr[22]    = 1
11965      instr[21]    = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11966      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11967                       11111 (immediate post inc)
11968      instr[15,14] = 11
11969      instr[13]    = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11970      instr[12]    = 0
11971      instr[11,10] = element size 00=> byte(b), 01=> half(h),
11972                                  10=> word(s), 11=> double(d)
11973      instr[9,5]   = address
11974      instr[4,0]   = Vd  */
11975
11976   unsigned full = INSTR (30, 30);
11977   unsigned vd = INSTR (4, 0);
11978   unsigned size = INSTR (11, 10);
11979   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11980   int i, n;
11981
11982   NYI_assert (29, 24, 0x0D);
11983   NYI_assert (22, 22, 1);
11984   NYI_assert (15, 14, 3);
11985   NYI_assert (12, 12, 0);
11986
11987   for (n = 0; n < nregs; n++)
11988     switch (size)
11989       {
11990       case 0:
11991         {
11992           uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11993           for (i = 0; i < (full ? 16 : 8); i++)
11994             aarch64_set_vec_u8 (cpu, vd + n, i, val);
11995           break;
11996         }
11997
11998       case 1:
11999         {
12000           uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
12001           for (i = 0; i < (full ? 8 : 4); i++)
12002             aarch64_set_vec_u16 (cpu, vd + n, i, val);
12003           break;
12004         }
12005
12006       case 2:
12007         {
12008           uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
12009           for (i = 0; i < (full ? 4 : 2); i++)
12010             aarch64_set_vec_u32 (cpu, vd + n, i, val);
12011           break;
12012         }
12013
12014       case 3:
12015         {
12016           uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
12017           for (i = 0; i < (full ? 2 : 1); i++)
12018             aarch64_set_vec_u64 (cpu, vd + n, i, val);
12019           break;
12020         }
12021
12022       default:
12023         HALT_UNALLOC;
12024       }
12025 }
12026
12027 static void
12028 do_vec_load_store (sim_cpu *cpu)
12029 {
12030   /* {LD|ST}<N>   {Vd..Vd+N}, vaddr
12031
12032      instr[31]    = 0
12033      instr[30]    = element selector 0=>half, 1=>all elements
12034      instr[29,25] = 00110
12035      instr[24]    = 0=>multiple struct, 1=>single struct
12036      instr[23]    = 0=>simple, 1=>post
12037      instr[22]    = 0=>store, 1=>load
12038      instr[21]    = 0 (LDn) / small(0)-large(1) selector (LDnR)
12039      instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
12040                     11111 (immediate post inc)
12041      instr[15,12] = elements and destinations.  eg for load:
12042                      0000=>LD4 => load multiple 4-element to
12043                      four consecutive registers
12044                      0100=>LD3 => load multiple 3-element to
12045                      three consecutive registers
12046                      1000=>LD2 => load multiple 2-element to
12047                      two consecutive registers
12048                      0010=>LD1 => load multiple 1-element to
12049                      four consecutive registers
12050                      0110=>LD1 => load multiple 1-element to
12051                      three consecutive registers
12052                      1010=>LD1 => load multiple 1-element to
12053                      two consecutive registers
12054                      0111=>LD1 => load multiple 1-element to
12055                      one register
12056                      1100=>LDR1,LDR2
12057                      1110=>LDR3,LDR4
12058      instr[11,10] = element size 00=> byte(b), 01=> half(h),
12059                                  10=> word(s), 11=> double(d)
12060      instr[9,5]   = Vn, can be SP
12061      instr[4,0]   = Vd  */
12062
12063   int single;
12064   int post;
12065   int load;
12066   unsigned vn;
12067   uint64_t address;
12068   int type;
12069
12070   if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
12071     HALT_NYI;
12072
12073   single = INSTR (24, 24);
12074   post = INSTR (23, 23);
12075   load = INSTR (22, 22);
12076   type = INSTR (15, 12);
12077   vn = INSTR (9, 5);
12078   address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
12079
12080   if (! single && INSTR (21, 21) != 0)
12081     HALT_UNALLOC;
12082
12083   if (post)
12084     {
12085       unsigned vm = INSTR (20, 16);
12086
12087       if (vm == R31)
12088         {
12089           unsigned sizeof_operation;
12090
12091           if (single)
12092             {
12093               if ((type >= 0) && (type <= 11))
12094                 {
12095                   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
12096                   switch (INSTR (15, 14))
12097                     {
12098                     case 0:
12099                       sizeof_operation = nregs * 1;
12100                       break;
12101                     case 1:
12102                       sizeof_operation = nregs * 2;
12103                       break;
12104                     case 2:
12105                       if (INSTR (10, 10) == 0)
12106                         sizeof_operation = nregs * 4;
12107                       else
12108                         sizeof_operation = nregs * 8;
12109                       break;
12110                     default:
12111                       HALT_UNALLOC;
12112                     }
12113                 }
12114               else if (type == 0xC)
12115                 {
12116                   sizeof_operation = INSTR (21, 21) ? 2 : 1;
12117                   sizeof_operation <<= INSTR (11, 10);
12118                 }
12119               else if (type == 0xE)
12120                 {
12121                   sizeof_operation = INSTR (21, 21) ? 4 : 3;
12122                   sizeof_operation <<= INSTR (11, 10);
12123                 }
12124               else
12125                 HALT_UNALLOC;
12126             }
12127           else
12128             {
12129               switch (type)
12130                 {
12131                 case 0: sizeof_operation = 32; break;
12132                 case 4: sizeof_operation = 24; break;
12133                 case 8: sizeof_operation = 16; break;
12134
12135                 case 7:
12136                   /* One register, immediate offset variant.  */
12137                   sizeof_operation = 8;
12138                   break;
12139
12140                 case 10:
12141                   /* Two registers, immediate offset variant.  */
12142                   sizeof_operation = 16;
12143                   break;
12144
12145                 case 6:
12146                   /* Three registers, immediate offset variant.  */
12147                   sizeof_operation = 24;
12148                   break;
12149
12150                 case 2:
12151                   /* Four registers, immediate offset variant.  */
12152                   sizeof_operation = 32;
12153                   break;
12154
12155                 default:
12156                   HALT_UNALLOC;
12157                 }
12158
12159               if (INSTR (30, 30))
12160                 sizeof_operation *= 2;
12161             }
12162
12163           aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
12164         }
12165       else
12166         aarch64_set_reg_u64 (cpu, vn, SP_OK,
12167                              address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
12168     }
12169   else
12170     {
12171       NYI_assert (20, 16, 0);
12172     }
12173
12174   if (single)
12175     {
12176       if (load)
12177         {
12178           if ((type >= 0) && (type <= 11))
12179             do_vec_LDn_single (cpu, address);
12180           else if ((type == 0xC) || (type == 0xE))
12181             do_vec_LDnR (cpu, address);
12182           else
12183             HALT_UNALLOC;
12184           return;
12185         }
12186
12187       /* Stores.  */
12188       if ((type >= 0) && (type <= 11))
12189         {
12190           do_vec_STn_single (cpu, address);
12191           return;
12192         }
12193
12194       HALT_UNALLOC;
12195     }
12196
12197   if (load)
12198     {
12199       switch (type)
12200         {
12201         case 0:  LD4 (cpu, address); return;
12202         case 4:  LD3 (cpu, address); return;
12203         case 8:  LD2 (cpu, address); return;
12204         case 2:  LD1_4 (cpu, address); return;
12205         case 6:  LD1_3 (cpu, address); return;
12206         case 10: LD1_2 (cpu, address); return;
12207         case 7:  LD1_1 (cpu, address); return;
12208
12209         default:
12210           HALT_UNALLOC;
12211         }
12212     }
12213
12214   /* Stores.  */
12215   switch (type)
12216     {
12217     case 0:  ST4 (cpu, address); return;
12218     case 4:  ST3 (cpu, address); return;
12219     case 8:  ST2 (cpu, address); return;
12220     case 2:  ST1_4 (cpu, address); return;
12221     case 6:  ST1_3 (cpu, address); return;
12222     case 10: ST1_2 (cpu, address); return;
12223     case 7:  ST1_1 (cpu, address); return;
12224     default:
12225       HALT_UNALLOC;
12226     }
12227 }
12228
12229 static void
12230 dexLdSt (sim_cpu *cpu)
12231 {
12232   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12233      assert  group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
12234              group == GROUP_LDST_1100 || group == GROUP_LDST_1110
12235      bits [29,28:26] of a LS are the secondary dispatch vector.  */
12236   uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
12237
12238   switch (group2)
12239     {
12240     case LS_EXCL_000:
12241       dexLoadExclusive (cpu); return;
12242
12243     case LS_LIT_010:
12244     case LS_LIT_011:
12245       dexLoadLiteral (cpu); return;
12246
12247     case LS_OTHER_110:
12248     case LS_OTHER_111:
12249       dexLoadOther (cpu); return;
12250
12251     case LS_ADVSIMD_001:
12252       do_vec_load_store (cpu); return;
12253
12254     case LS_PAIR_100:
12255       dex_load_store_pair_gr (cpu); return;
12256
12257     case LS_PAIR_101:
12258       dex_load_store_pair_fp (cpu); return;
12259
12260     default:
12261       /* Should never reach here.  */
12262       HALT_NYI;
12263     }
12264 }
12265
12266 /* Specific decode and execute for group Data Processing Register.  */
12267
12268 static void
12269 dexLogicalShiftedRegister (sim_cpu *cpu)
12270 {
12271   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12272      instr[30,29] = op
12273      instr[28:24] = 01010
12274      instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12275      instr[21]    = N
12276      instr[20,16] = Rm
12277      instr[15,10] = count : must be 0xxxxx for 32 bit
12278      instr[9,5]   = Rn
12279      instr[4,0]   = Rd  */
12280
12281   uint32_t size      = INSTR (31, 31);
12282   Shift    shiftType = INSTR (23, 22);
12283   uint32_t count     = INSTR (15, 10);
12284
12285   /* 32 bit operations must have count[5] = 0.
12286      or else we have an UNALLOC.  */
12287   if (size == 0 && uimm (count, 5, 5))
12288     HALT_UNALLOC;
12289
12290   /* Dispatch on size:op:N.  */
12291   switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12292     {
12293     case 0: and32_shift  (cpu, shiftType, count); return;
12294     case 1: bic32_shift  (cpu, shiftType, count); return;
12295     case 2: orr32_shift  (cpu, shiftType, count); return;
12296     case 3: orn32_shift  (cpu, shiftType, count); return;
12297     case 4: eor32_shift  (cpu, shiftType, count); return;
12298     case 5: eon32_shift  (cpu, shiftType, count); return;
12299     case 6: ands32_shift (cpu, shiftType, count); return;
12300     case 7: bics32_shift (cpu, shiftType, count); return;
12301     case 8: and64_shift  (cpu, shiftType, count); return;
12302     case 9: bic64_shift  (cpu, shiftType, count); return;
12303     case 10:orr64_shift  (cpu, shiftType, count); return;
12304     case 11:orn64_shift  (cpu, shiftType, count); return;
12305     case 12:eor64_shift  (cpu, shiftType, count); return;
12306     case 13:eon64_shift  (cpu, shiftType, count); return;
12307     case 14:ands64_shift (cpu, shiftType, count); return;
12308     case 15:bics64_shift (cpu, shiftType, count); return;
12309     }
12310 }
12311
12312 /* 32 bit conditional select.  */
12313 static void
12314 csel32 (sim_cpu *cpu, CondCode cc)
12315 {
12316   unsigned rm = INSTR (20, 16);
12317   unsigned rn = INSTR (9, 5);
12318   unsigned rd = INSTR (4, 0);
12319
12320   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12321                        testConditionCode (cpu, cc)
12322                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12323                        : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12324 }
12325
12326 /* 64 bit conditional select.  */
12327 static void
12328 csel64 (sim_cpu *cpu, CondCode cc)
12329 {
12330   unsigned rm = INSTR (20, 16);
12331   unsigned rn = INSTR (9, 5);
12332   unsigned rd = INSTR (4, 0);
12333
12334   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12335                        testConditionCode (cpu, cc)
12336                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12337                        : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12338 }
12339
12340 /* 32 bit conditional increment.  */
12341 static void
12342 csinc32 (sim_cpu *cpu, CondCode cc)
12343 {
12344   unsigned rm = INSTR (20, 16);
12345   unsigned rn = INSTR (9, 5);
12346   unsigned rd = INSTR (4, 0);
12347
12348   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12349                        testConditionCode (cpu, cc)
12350                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12351                        : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12352 }
12353
12354 /* 64 bit conditional increment.  */
12355 static void
12356 csinc64 (sim_cpu *cpu, CondCode cc)
12357 {
12358   unsigned rm = INSTR (20, 16);
12359   unsigned rn = INSTR (9, 5);
12360   unsigned rd = INSTR (4, 0);
12361
12362   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12363                        testConditionCode (cpu, cc)
12364                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12365                        : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12366 }
12367
12368 /* 32 bit conditional invert.  */
12369 static void
12370 csinv32 (sim_cpu *cpu, CondCode cc)
12371 {
12372   unsigned rm = INSTR (20, 16);
12373   unsigned rn = INSTR (9, 5);
12374   unsigned rd = INSTR (4, 0);
12375
12376   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12377                        testConditionCode (cpu, cc)
12378                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12379                        : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12380 }
12381
12382 /* 64 bit conditional invert.  */
12383 static void
12384 csinv64 (sim_cpu *cpu, CondCode cc)
12385 {
12386   unsigned rm = INSTR (20, 16);
12387   unsigned rn = INSTR (9, 5);
12388   unsigned rd = INSTR (4, 0);
12389
12390   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12391                        testConditionCode (cpu, cc)
12392                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12393                        : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12394 }
12395
12396 /* 32 bit conditional negate.  */
12397 static void
12398 csneg32 (sim_cpu *cpu, CondCode cc)
12399 {
12400   unsigned rm = INSTR (20, 16);
12401   unsigned rn = INSTR (9, 5);
12402   unsigned rd = INSTR (4, 0);
12403
12404   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12405                        testConditionCode (cpu, cc)
12406                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12407                        : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12408 }
12409
12410 /* 64 bit conditional negate.  */
12411 static void
12412 csneg64 (sim_cpu *cpu, CondCode cc)
12413 {
12414   unsigned rm = INSTR (20, 16);
12415   unsigned rn = INSTR (9, 5);
12416   unsigned rd = INSTR (4, 0);
12417
12418   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12419                        testConditionCode (cpu, cc)
12420                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12421                        : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12422 }
12423
12424 static void
12425 dexCondSelect (sim_cpu *cpu)
12426 {
12427   /* instr[28,21] = 11011011
12428      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12429      instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12430                             100 ==> CSINV, 101 ==> CSNEG,
12431                             _1_ ==> UNALLOC
12432      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12433      instr[15,12] = cond
12434      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC  */
12435
12436   CondCode cc = INSTR (15, 12);
12437   uint32_t S = INSTR (29, 29);
12438   uint32_t op2 = INSTR (11, 10);
12439
12440   if (S == 1)
12441     HALT_UNALLOC;
12442
12443   if (op2 & 0x2)
12444     HALT_UNALLOC;
12445
12446   switch ((INSTR (31, 30) << 1) | op2)
12447     {
12448     case 0: csel32  (cpu, cc); return;
12449     case 1: csinc32 (cpu, cc); return;
12450     case 2: csinv32 (cpu, cc); return;
12451     case 3: csneg32 (cpu, cc); return;
12452     case 4: csel64  (cpu, cc); return;
12453     case 5: csinc64 (cpu, cc); return;
12454     case 6: csinv64 (cpu, cc); return;
12455     case 7: csneg64 (cpu, cc); return;
12456     }
12457 }
12458
12459 /* Some helpers for counting leading 1 or 0 bits.  */
12460
12461 /* Counts the number of leading bits which are the same
12462    in a 32 bit value in the range 1 to 32.  */
12463 static uint32_t
12464 leading32 (uint32_t value)
12465 {
12466   int32_t mask= 0xffff0000;
12467   uint32_t count= 16; /* Counts number of bits set in mask.  */
12468   uint32_t lo = 1;    /* Lower bound for number of sign bits.  */
12469   uint32_t hi = 32;   /* Upper bound for number of sign bits.  */
12470
12471   while (lo + 1 < hi)
12472     {
12473       int32_t test = (value & mask);
12474
12475       if (test == 0 || test == mask)
12476         {
12477           lo = count;
12478           count = (lo + hi) / 2;
12479           mask >>= (count - lo);
12480         }
12481       else
12482         {
12483           hi = count;
12484           count = (lo + hi) / 2;
12485           mask <<= hi - count;
12486         }
12487     }
12488
12489   if (lo != hi)
12490     {
12491       int32_t test;
12492
12493       mask >>= 1;
12494       test = (value & mask);
12495
12496       if (test == 0 || test == mask)
12497         count = hi;
12498       else
12499         count = lo;
12500     }
12501
12502   return count;
12503 }
12504
12505 /* Counts the number of leading bits which are the same
12506    in a 64 bit value in the range 1 to 64.  */
12507 static uint64_t
12508 leading64 (uint64_t value)
12509 {
12510   int64_t mask= 0xffffffff00000000LL;
12511   uint64_t count = 32; /* Counts number of bits set in mask.  */
12512   uint64_t lo = 1;     /* Lower bound for number of sign bits.  */
12513   uint64_t hi = 64;    /* Upper bound for number of sign bits.  */
12514
12515   while (lo + 1 < hi)
12516     {
12517       int64_t test = (value & mask);
12518
12519       if (test == 0 || test == mask)
12520         {
12521           lo = count;
12522           count = (lo + hi) / 2;
12523           mask >>= (count - lo);
12524         }
12525       else
12526         {
12527           hi = count;
12528           count = (lo + hi) / 2;
12529           mask <<= hi - count;
12530         }
12531     }
12532
12533   if (lo != hi)
12534     {
12535       int64_t test;
12536
12537       mask >>= 1;
12538       test = (value & mask);
12539
12540       if (test == 0 || test == mask)
12541         count = hi;
12542       else
12543         count = lo;
12544     }
12545
12546   return count;
12547 }
12548
12549 /* Bit operations.  */
12550 /* N.B register args may not be SP.  */
12551
12552 /* 32 bit count leading sign bits.  */
12553 static void
12554 cls32 (sim_cpu *cpu)
12555 {
12556   unsigned rn = INSTR (9, 5);
12557   unsigned rd = INSTR (4, 0);
12558
12559   /* N.B. the result needs to exclude the leading bit.  */
12560   aarch64_set_reg_u64
12561     (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12562 }
12563
12564 /* 64 bit count leading sign bits.  */
12565 static void
12566 cls64 (sim_cpu *cpu)
12567 {
12568   unsigned rn = INSTR (9, 5);
12569   unsigned rd = INSTR (4, 0);
12570
12571   /* N.B. the result needs to exclude the leading bit.  */
12572   aarch64_set_reg_u64
12573     (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12574 }
12575
12576 /* 32 bit count leading zero bits.  */
12577 static void
12578 clz32 (sim_cpu *cpu)
12579 {
12580   unsigned rn = INSTR (9, 5);
12581   unsigned rd = INSTR (4, 0);
12582   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12583
12584   /* if the sign (top) bit is set then the count is 0.  */
12585   if (pick32 (value, 31, 31))
12586     aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12587   else
12588     aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12589 }
12590
12591 /* 64 bit count leading zero bits.  */
12592 static void
12593 clz64 (sim_cpu *cpu)
12594 {
12595   unsigned rn = INSTR (9, 5);
12596   unsigned rd = INSTR (4, 0);
12597   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12598
12599   /* if the sign (top) bit is set then the count is 0.  */
12600   if (pick64 (value, 63, 63))
12601     aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12602   else
12603     aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12604 }
12605
12606 /* 32 bit reverse bits.  */
12607 static void
12608 rbit32 (sim_cpu *cpu)
12609 {
12610   unsigned rn = INSTR (9, 5);
12611   unsigned rd = INSTR (4, 0);
12612   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12613   uint32_t result = 0;
12614   int i;
12615
12616   for (i = 0; i < 32; i++)
12617     {
12618       result <<= 1;
12619       result |= (value & 1);
12620       value >>= 1;
12621     }
12622   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12623 }
12624
12625 /* 64 bit reverse bits.  */
12626 static void
12627 rbit64 (sim_cpu *cpu)
12628 {
12629   unsigned rn = INSTR (9, 5);
12630   unsigned rd = INSTR (4, 0);
12631   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12632   uint64_t result = 0;
12633   int i;
12634
12635   for (i = 0; i < 64; i++)
12636     {
12637       result <<= 1;
12638       result |= (value & 1UL);
12639       value >>= 1;
12640     }
12641   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12642 }
12643
12644 /* 32 bit reverse bytes.  */
12645 static void
12646 rev32 (sim_cpu *cpu)
12647 {
12648   unsigned rn = INSTR (9, 5);
12649   unsigned rd = INSTR (4, 0);
12650   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12651   uint32_t result = 0;
12652   int i;
12653
12654   for (i = 0; i < 4; i++)
12655     {
12656       result <<= 8;
12657       result |= (value & 0xff);
12658       value >>= 8;
12659     }
12660   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12661 }
12662
12663 /* 64 bit reverse bytes.  */
12664 static void
12665 rev64 (sim_cpu *cpu)
12666 {
12667   unsigned rn = INSTR (9, 5);
12668   unsigned rd = INSTR (4, 0);
12669   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12670   uint64_t result = 0;
12671   int i;
12672
12673   for (i = 0; i < 8; i++)
12674     {
12675       result <<= 8;
12676       result |= (value & 0xffULL);
12677       value >>= 8;
12678     }
12679   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12680 }
12681
12682 /* 32 bit reverse shorts.  */
12683 /* N.B.this reverses the order of the bytes in each half word.  */
12684 static void
12685 revh32 (sim_cpu *cpu)
12686 {
12687   unsigned rn = INSTR (9, 5);
12688   unsigned rd = INSTR (4, 0);
12689   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12690   uint32_t result = 0;
12691   int i;
12692
12693   for (i = 0; i < 2; i++)
12694     {
12695       result <<= 8;
12696       result |= (value & 0x00ff00ff);
12697       value >>= 8;
12698     }
12699   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12700 }
12701
12702 /* 64 bit reverse shorts.  */
12703 /* N.B.this reverses the order of the bytes in each half word.  */
12704 static void
12705 revh64 (sim_cpu *cpu)
12706 {
12707   unsigned rn = INSTR (9, 5);
12708   unsigned rd = INSTR (4, 0);
12709   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12710   uint64_t result = 0;
12711   int i;
12712
12713   for (i = 0; i < 2; i++)
12714     {
12715       result <<= 8;
12716       result |= (value & 0x00ff00ff00ff00ffULL);
12717       value >>= 8;
12718     }
12719   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12720 }
12721
12722 static void
12723 dexDataProc1Source (sim_cpu *cpu)
12724 {
12725   /* instr[30]    = 1
12726      instr[28,21] = 111010110
12727      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12728      instr[29]    = S : 0 ==> ok, 1 ==> UNALLOC
12729      instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12730      instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12731                              000010 ==> REV, 000011 ==> UNALLOC
12732                              000100 ==> CLZ, 000101 ==> CLS
12733                              ow ==> UNALLOC
12734      instr[9,5]   = rn : may not be SP
12735      instr[4,0]   = rd : may not be SP.  */
12736
12737   uint32_t S = INSTR (29, 29);
12738   uint32_t opcode2 = INSTR (20, 16);
12739   uint32_t opcode = INSTR (15, 10);
12740   uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12741
12742   if (S == 1)
12743     HALT_UNALLOC;
12744
12745   if (opcode2 != 0)
12746     HALT_UNALLOC;
12747
12748   if (opcode & 0x38)
12749     HALT_UNALLOC;
12750
12751   switch (dispatch)
12752     {
12753     case 0: rbit32 (cpu); return;
12754     case 1: revh32 (cpu); return;
12755     case 2: rev32 (cpu); return;
12756     case 4: clz32 (cpu); return;
12757     case 5: cls32 (cpu); return;
12758     case 8: rbit64 (cpu); return;
12759     case 9: revh64 (cpu); return;
12760     case 10:rev32 (cpu); return;
12761     case 11:rev64 (cpu); return;
12762     case 12:clz64 (cpu); return;
12763     case 13:cls64 (cpu); return;
12764     default: HALT_UNALLOC;
12765     }
12766 }
12767
12768 /* Variable shift.
12769    Shifts by count supplied in register.
12770    N.B register args may not be SP.
12771    These all use the shifted auxiliary function for
12772    simplicity and clarity.  Writing the actual shift
12773    inline would avoid a branch and so be faster but
12774    would also necessitate getting signs right.  */
12775
12776 /* 32 bit arithmetic shift right.  */
12777 static void
12778 asrv32 (sim_cpu *cpu)
12779 {
12780   unsigned rm = INSTR (20, 16);
12781   unsigned rn = INSTR (9, 5);
12782   unsigned rd = INSTR (4, 0);
12783
12784   aarch64_set_reg_u64
12785     (cpu, rd, NO_SP,
12786      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12787                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12788 }
12789
12790 /* 64 bit arithmetic shift right.  */
12791 static void
12792 asrv64 (sim_cpu *cpu)
12793 {
12794   unsigned rm = INSTR (20, 16);
12795   unsigned rn = INSTR (9, 5);
12796   unsigned rd = INSTR (4, 0);
12797
12798   aarch64_set_reg_u64
12799     (cpu, rd, NO_SP,
12800      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12801                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12802 }
12803
12804 /* 32 bit logical shift left.  */
12805 static void
12806 lslv32 (sim_cpu *cpu)
12807 {
12808   unsigned rm = INSTR (20, 16);
12809   unsigned rn = INSTR (9, 5);
12810   unsigned rd = INSTR (4, 0);
12811
12812   aarch64_set_reg_u64
12813     (cpu, rd, NO_SP,
12814      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12815                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12816 }
12817
12818 /* 64 bit arithmetic shift left.  */
12819 static void
12820 lslv64 (sim_cpu *cpu)
12821 {
12822   unsigned rm = INSTR (20, 16);
12823   unsigned rn = INSTR (9, 5);
12824   unsigned rd = INSTR (4, 0);
12825
12826   aarch64_set_reg_u64
12827     (cpu, rd, NO_SP,
12828      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12829                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12830 }
12831
12832 /* 32 bit logical shift right.  */
12833 static void
12834 lsrv32 (sim_cpu *cpu)
12835 {
12836   unsigned rm = INSTR (20, 16);
12837   unsigned rn = INSTR (9, 5);
12838   unsigned rd = INSTR (4, 0);
12839
12840   aarch64_set_reg_u64
12841     (cpu, rd, NO_SP,
12842      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12843                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12844 }
12845
12846 /* 64 bit logical shift right.  */
12847 static void
12848 lsrv64 (sim_cpu *cpu)
12849 {
12850   unsigned rm = INSTR (20, 16);
12851   unsigned rn = INSTR (9, 5);
12852   unsigned rd = INSTR (4, 0);
12853
12854   aarch64_set_reg_u64
12855     (cpu, rd, NO_SP,
12856      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12857                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12858 }
12859
12860 /* 32 bit rotate right.  */
12861 static void
12862 rorv32 (sim_cpu *cpu)
12863 {
12864   unsigned rm = INSTR (20, 16);
12865   unsigned rn = INSTR (9, 5);
12866   unsigned rd = INSTR (4, 0);
12867
12868   aarch64_set_reg_u64
12869     (cpu, rd, NO_SP,
12870      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12871                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12872 }
12873
12874 /* 64 bit rotate right.  */
12875 static void
12876 rorv64 (sim_cpu *cpu)
12877 {
12878   unsigned rm = INSTR (20, 16);
12879   unsigned rn = INSTR (9, 5);
12880   unsigned rd = INSTR (4, 0);
12881
12882   aarch64_set_reg_u64
12883     (cpu, rd, NO_SP,
12884      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12885                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12886 }
12887
12888
12889 /* divide.  */
12890
12891 /* 32 bit signed divide.  */
12892 static void
12893 cpuiv32 (sim_cpu *cpu)
12894 {
12895   unsigned rm = INSTR (20, 16);
12896   unsigned rn = INSTR (9, 5);
12897   unsigned rd = INSTR (4, 0);
12898   /* N.B. the pseudo-code does the divide using 64 bit data.  */
12899   /* TODO : check that this rounds towards zero as required.  */
12900   int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12901   int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12902
12903   aarch64_set_reg_s64 (cpu, rd, NO_SP,
12904                        divisor ? ((int32_t) (dividend / divisor)) : 0);
12905 }
12906
12907 /* 64 bit signed divide.  */
12908 static void
12909 cpuiv64 (sim_cpu *cpu)
12910 {
12911   unsigned rm = INSTR (20, 16);
12912   unsigned rn = INSTR (9, 5);
12913   unsigned rd = INSTR (4, 0);
12914
12915   /* TODO : check that this rounds towards zero as required.  */
12916   int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12917
12918   aarch64_set_reg_s64
12919     (cpu, rd, NO_SP,
12920      divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12921 }
12922
12923 /* 32 bit unsigned divide.  */
12924 static void
12925 udiv32 (sim_cpu *cpu)
12926 {
12927   unsigned rm = INSTR (20, 16);
12928   unsigned rn = INSTR (9, 5);
12929   unsigned rd = INSTR (4, 0);
12930
12931   /* N.B. the pseudo-code does the divide using 64 bit data.  */
12932   uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12933   uint64_t divisor  = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12934
12935   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12936                        divisor ? (uint32_t) (dividend / divisor) : 0);
12937 }
12938
12939 /* 64 bit unsigned divide.  */
12940 static void
12941 udiv64 (sim_cpu *cpu)
12942 {
12943   unsigned rm = INSTR (20, 16);
12944   unsigned rn = INSTR (9, 5);
12945   unsigned rd = INSTR (4, 0);
12946
12947   /* TODO : check that this rounds towards zero as required.  */
12948   uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12949
12950   aarch64_set_reg_u64
12951     (cpu, rd, NO_SP,
12952      divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12953 }
12954
12955 static void
12956 dexDataProc2Source (sim_cpu *cpu)
12957 {
12958   /* assert instr[30] == 0
12959      instr[28,21] == 11010110
12960      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12961      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12962      instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12963                              001000 ==> LSLV, 001001 ==> LSRV
12964                              001010 ==> ASRV, 001011 ==> RORV
12965                              ow ==> UNALLOC.  */
12966
12967   uint32_t dispatch;
12968   uint32_t S = INSTR (29, 29);
12969   uint32_t opcode = INSTR (15, 10);
12970
12971   if (S == 1)
12972     HALT_UNALLOC;
12973
12974   if (opcode & 0x34)
12975     HALT_UNALLOC;
12976
12977   dispatch = (  (INSTR (31, 31) << 3)
12978               | (uimm (opcode, 3, 3) << 2)
12979               |  uimm (opcode, 1, 0));
12980   switch (dispatch)
12981     {
12982     case 2:  udiv32 (cpu); return;
12983     case 3:  cpuiv32 (cpu); return;
12984     case 4:  lslv32 (cpu); return;
12985     case 5:  lsrv32 (cpu); return;
12986     case 6:  asrv32 (cpu); return;
12987     case 7:  rorv32 (cpu); return;
12988     case 10: udiv64 (cpu); return;
12989     case 11: cpuiv64 (cpu); return;
12990     case 12: lslv64 (cpu); return;
12991     case 13: lsrv64 (cpu); return;
12992     case 14: asrv64 (cpu); return;
12993     case 15: rorv64 (cpu); return;
12994     default: HALT_UNALLOC;
12995     }
12996 }
12997
12998
12999 /* Multiply.  */
13000
13001 /* 32 bit multiply and add.  */
13002 static void
13003 madd32 (sim_cpu *cpu)
13004 {
13005   unsigned rm = INSTR (20, 16);
13006   unsigned ra = INSTR (14, 10);
13007   unsigned rn = INSTR (9, 5);
13008   unsigned rd = INSTR (4, 0);
13009
13010   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13011   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13012                        aarch64_get_reg_u32 (cpu, ra, NO_SP)
13013                        + aarch64_get_reg_u32 (cpu, rn, NO_SP)
13014                        * aarch64_get_reg_u32 (cpu, rm, NO_SP));
13015 }
13016
13017 /* 64 bit multiply and add.  */
13018 static void
13019 madd64 (sim_cpu *cpu)
13020 {
13021   unsigned rm = INSTR (20, 16);
13022   unsigned ra = INSTR (14, 10);
13023   unsigned rn = INSTR (9, 5);
13024   unsigned rd = INSTR (4, 0);
13025
13026   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13027   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13028                        aarch64_get_reg_u64 (cpu, ra, NO_SP)
13029                        + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
13030                           * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13031 }
13032
13033 /* 32 bit multiply and sub.  */
13034 static void
13035 msub32 (sim_cpu *cpu)
13036 {
13037   unsigned rm = INSTR (20, 16);
13038   unsigned ra = INSTR (14, 10);
13039   unsigned rn = INSTR (9, 5);
13040   unsigned rd = INSTR (4, 0);
13041
13042   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13043   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13044                        aarch64_get_reg_u32 (cpu, ra, NO_SP)
13045                        - aarch64_get_reg_u32 (cpu, rn, NO_SP)
13046                        * aarch64_get_reg_u32 (cpu, rm, NO_SP));
13047 }
13048
13049 /* 64 bit multiply and sub.  */
13050 static void
13051 msub64 (sim_cpu *cpu)
13052 {
13053   unsigned rm = INSTR (20, 16);
13054   unsigned ra = INSTR (14, 10);
13055   unsigned rn = INSTR (9, 5);
13056   unsigned rd = INSTR (4, 0);
13057
13058   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13059   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13060                        aarch64_get_reg_u64 (cpu, ra, NO_SP)
13061                        - aarch64_get_reg_u64 (cpu, rn, NO_SP)
13062                        * aarch64_get_reg_u64 (cpu, rm, NO_SP));
13063 }
13064
13065 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit.  */
13066 static void
13067 smaddl (sim_cpu *cpu)
13068 {
13069   unsigned rm = INSTR (20, 16);
13070   unsigned ra = INSTR (14, 10);
13071   unsigned rn = INSTR (9, 5);
13072   unsigned rd = INSTR (4, 0);
13073
13074   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13075      obtain a 64 bit product.  */
13076   aarch64_set_reg_s64
13077     (cpu, rd, NO_SP,
13078      aarch64_get_reg_s64 (cpu, ra, NO_SP)
13079      + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13080      * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13081 }
13082
13083 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit.  */
13084 static void
13085 smsubl (sim_cpu *cpu)
13086 {
13087   unsigned rm = INSTR (20, 16);
13088   unsigned ra = INSTR (14, 10);
13089   unsigned rn = INSTR (9, 5);
13090   unsigned rd = INSTR (4, 0);
13091
13092   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13093      obtain a 64 bit product.  */
13094   aarch64_set_reg_s64
13095     (cpu, rd, NO_SP,
13096      aarch64_get_reg_s64 (cpu, ra, NO_SP)
13097      - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13098      * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13099 }
13100
13101 /* Integer Multiply/Divide.  */
13102
13103 /* First some macros and a helper function.  */
13104 /* Macros to test or access elements of 64 bit words.  */
13105
13106 /* Mask used to access lo 32 bits of 64 bit unsigned int.  */
13107 #define LOW_WORD_MASK ((1ULL << 32) - 1)
13108 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int.  */
13109 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
13110 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int.  */
13111 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
13112
13113 /* Offset of sign bit in 64 bit signed integger.  */
13114 #define SIGN_SHIFT_U64 63
13115 /* The sign bit itself -- also identifies the minimum negative int value.  */
13116 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
13117 /* Return true if a 64 bit signed int presented as an unsigned int is the
13118    most negative value.  */
13119 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
13120 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
13121    int has its sign bit set to false.  */
13122 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
13123 /* Return 1L or -1L according to whether a 64 bit signed int presented as
13124    an unsigned int has its sign bit set or not.  */
13125 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
13126 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int.  */
13127 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
13128
13129 /* Multiply two 64 bit ints and return.
13130    the hi 64 bits of the 128 bit product.  */
13131
13132 static uint64_t
13133 mul64hi (uint64_t value1, uint64_t value2)
13134 {
13135   uint64_t resultmid1;
13136   uint64_t result;
13137   uint64_t value1_lo = lowWordToU64 (value1);
13138   uint64_t value1_hi = highWordToU64 (value1) ;
13139   uint64_t value2_lo = lowWordToU64 (value2);
13140   uint64_t value2_hi = highWordToU64 (value2);
13141
13142   /* Cross-multiply and collect results.  */
13143   uint64_t xproductlo = value1_lo * value2_lo;
13144   uint64_t xproductmid1 = value1_lo * value2_hi;
13145   uint64_t xproductmid2 = value1_hi * value2_lo;
13146   uint64_t xproducthi = value1_hi * value2_hi;
13147   uint64_t carry = 0;
13148   /* Start accumulating 64 bit results.  */
13149   /* Drop bottom half of lowest cross-product.  */
13150   uint64_t resultmid = xproductlo >> 32;
13151   /* Add in middle products.  */
13152   resultmid = resultmid + xproductmid1;
13153
13154   /* Check for overflow.  */
13155   if (resultmid < xproductmid1)
13156     /* Carry over 1 into top cross-product.  */
13157     carry++;
13158
13159   resultmid1  = resultmid + xproductmid2;
13160
13161   /* Check for overflow.  */
13162   if (resultmid1 < xproductmid2)
13163     /* Carry over 1 into top cross-product.  */
13164     carry++;
13165
13166   /* Drop lowest 32 bits of middle cross-product.  */
13167   result = resultmid1 >> 32;
13168   /* Move carry bit to just above middle cross-product highest bit.  */
13169   carry = carry << 32;
13170
13171   /* Add top cross-product plus and any carry.  */
13172   result += xproducthi + carry;
13173
13174   return result;
13175 }
13176
13177 /* Signed multiply high, source, source2 :
13178    64 bit, dest <-- high 64-bit of result.  */
13179 static void
13180 smulh (sim_cpu *cpu)
13181 {
13182   uint64_t uresult;
13183   int64_t  result;
13184   unsigned rm = INSTR (20, 16);
13185   unsigned rn = INSTR (9, 5);
13186   unsigned rd = INSTR (4, 0);
13187   GReg     ra = INSTR (14, 10);
13188   int64_t  value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
13189   int64_t  value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
13190   uint64_t uvalue1;
13191   uint64_t uvalue2;
13192   int  negate = 0;
13193
13194   if (ra != R31)
13195     HALT_UNALLOC;
13196
13197   /* Convert to unsigned and use the unsigned mul64hi routine
13198      the fix the sign up afterwards.  */
13199   if (value1 < 0)
13200     {
13201       negate = !negate;
13202       uvalue1 = -value1;
13203     }
13204   else
13205     {
13206       uvalue1 = value1;
13207     }
13208
13209   if (value2 < 0)
13210     {
13211       negate = !negate;
13212       uvalue2 = -value2;
13213     }
13214   else
13215     {
13216       uvalue2 = value2;
13217     }
13218
13219   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13220
13221   uresult = mul64hi (uvalue1, uvalue2);
13222   result = uresult;
13223
13224   if (negate)
13225     {
13226       /* Multiply 128-bit result by -1, which means highpart gets inverted,
13227          and has carry in added only if low part is 0.  */
13228       result = ~result;
13229       if ((uvalue1 * uvalue2) == 0)
13230         result += 1;
13231     }
13232
13233   aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
13234 }
13235
13236 /* Unsigned multiply add long -- source, source2 :
13237    32 bit, source3 : 64 bit.  */
13238 static void
13239 umaddl (sim_cpu *cpu)
13240 {
13241   unsigned rm = INSTR (20, 16);
13242   unsigned ra = INSTR (14, 10);
13243   unsigned rn = INSTR (9, 5);
13244   unsigned rd = INSTR (4, 0);
13245
13246   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13247   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13248      obtain a 64 bit product.  */
13249   aarch64_set_reg_u64
13250     (cpu, rd, NO_SP,
13251      aarch64_get_reg_u64 (cpu, ra, NO_SP)
13252      + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13253      * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13254 }
13255
13256 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit.  */
13257 static void
13258 umsubl (sim_cpu *cpu)
13259 {
13260   unsigned rm = INSTR (20, 16);
13261   unsigned ra = INSTR (14, 10);
13262   unsigned rn = INSTR (9, 5);
13263   unsigned rd = INSTR (4, 0);
13264
13265   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13266   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13267      obtain a 64 bit product.  */
13268   aarch64_set_reg_u64
13269     (cpu, rd, NO_SP,
13270      aarch64_get_reg_u64 (cpu, ra, NO_SP)
13271      - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13272      * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13273 }
13274
13275 /* Unsigned multiply high, source, source2 :
13276    64 bit, dest <-- high 64-bit of result.  */
13277 static void
13278 umulh (sim_cpu *cpu)
13279 {
13280   unsigned rm = INSTR (20, 16);
13281   unsigned rn = INSTR (9, 5);
13282   unsigned rd = INSTR (4, 0);
13283   GReg     ra = INSTR (14, 10);
13284
13285   if (ra != R31)
13286     HALT_UNALLOC;
13287
13288   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13289   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13290                        mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13291                                 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13292 }
13293
13294 static void
13295 dexDataProc3Source (sim_cpu *cpu)
13296 {
13297   /* assert instr[28,24] == 11011.  */
13298   /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13299      instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13300      instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13301      instr[15] = o0 : 0/1 ==> ok
13302      instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB,     (32/64 bit)
13303                               0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13304                               0100 ==> SMULH,                   (64 bit only)
13305                               1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13306                               1100 ==> UMULH                    (64 bit only)
13307                               ow ==> UNALLOC.  */
13308
13309   uint32_t dispatch;
13310   uint32_t size = INSTR (31, 31);
13311   uint32_t op54 = INSTR (30, 29);
13312   uint32_t op31 = INSTR (23, 21);
13313   uint32_t o0 = INSTR (15, 15);
13314
13315   if (op54 != 0)
13316     HALT_UNALLOC;
13317
13318   if (size == 0)
13319     {
13320       if (op31 != 0)
13321         HALT_UNALLOC;
13322
13323       if (o0 == 0)
13324         madd32 (cpu);
13325       else
13326         msub32 (cpu);
13327       return;
13328     }
13329
13330   dispatch = (op31 << 1) | o0;
13331
13332   switch (dispatch)
13333     {
13334     case 0:  madd64 (cpu); return;
13335     case 1:  msub64 (cpu); return;
13336     case 2:  smaddl (cpu); return;
13337     case 3:  smsubl (cpu); return;
13338     case 4:  smulh (cpu); return;
13339     case 10: umaddl (cpu); return;
13340     case 11: umsubl (cpu); return;
13341     case 12: umulh (cpu); return;
13342     default: HALT_UNALLOC;
13343     }
13344 }
13345
13346 static void
13347 dexDPReg (sim_cpu *cpu)
13348 {
13349   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13350      assert  group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13351      bits [28:24:21] of a DPReg are the secondary dispatch vector.  */
13352   uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13353
13354   switch (group2)
13355     {
13356     case DPREG_LOG_000:
13357     case DPREG_LOG_001:
13358       dexLogicalShiftedRegister (cpu); return;
13359
13360     case DPREG_ADDSHF_010:
13361       dexAddSubtractShiftedRegister (cpu); return;
13362
13363     case DPREG_ADDEXT_011:
13364       dexAddSubtractExtendedRegister (cpu); return;
13365
13366     case DPREG_ADDCOND_100:
13367       {
13368         /* This set bundles a variety of different operations.  */
13369         /* Check for.  */
13370         /* 1) add/sub w carry.  */
13371         uint32_t mask1 = 0x1FE00000U;
13372         uint32_t val1  = 0x1A000000U;
13373         /* 2) cond compare register/immediate.  */
13374         uint32_t mask2 = 0x1FE00000U;
13375         uint32_t val2  = 0x1A400000U;
13376         /* 3) cond select.  */
13377         uint32_t mask3 = 0x1FE00000U;
13378         uint32_t val3  = 0x1A800000U;
13379         /* 4) data proc 1/2 source.  */
13380         uint32_t mask4 = 0x1FE00000U;
13381         uint32_t val4  = 0x1AC00000U;
13382
13383         if ((aarch64_get_instr (cpu) & mask1) == val1)
13384           dexAddSubtractWithCarry (cpu);
13385
13386         else if ((aarch64_get_instr (cpu) & mask2) == val2)
13387           CondCompare (cpu);
13388
13389         else if ((aarch64_get_instr (cpu) & mask3) == val3)
13390           dexCondSelect (cpu);
13391
13392         else if ((aarch64_get_instr (cpu) & mask4) == val4)
13393           {
13394             /* Bit 30 is clear for data proc 2 source
13395                and set for data proc 1 source.  */
13396             if (aarch64_get_instr (cpu)  & (1U << 30))
13397               dexDataProc1Source (cpu);
13398             else
13399               dexDataProc2Source (cpu);
13400           }
13401
13402         else
13403           /* Should not reach here.  */
13404           HALT_NYI;
13405
13406         return;
13407       }
13408
13409     case DPREG_3SRC_110:
13410       dexDataProc3Source (cpu); return;
13411
13412     case DPREG_UNALLOC_101:
13413       HALT_UNALLOC;
13414
13415     case DPREG_3SRC_111:
13416       dexDataProc3Source (cpu); return;
13417
13418     default:
13419       /* Should never reach here.  */
13420       HALT_NYI;
13421     }
13422 }
13423
13424 /* Unconditional Branch immediate.
13425    Offset is a PC-relative byte offset in the range +/- 128MiB.
13426    The offset is assumed to be raw from the decode i.e. the
13427    simulator is expected to scale them from word offsets to byte.  */
13428
13429 /* Unconditional branch.  */
13430 static void
13431 buc (sim_cpu *cpu, int32_t offset)
13432 {
13433   aarch64_set_next_PC_by_offset (cpu, offset);
13434 }
13435
13436 static unsigned stack_depth = 0;
13437
13438 /* Unconditional branch and link -- writes return PC to LR.  */
13439 static void
13440 bl (sim_cpu *cpu, int32_t offset)
13441 {
13442   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13443   aarch64_save_LR (cpu);
13444   aarch64_set_next_PC_by_offset (cpu, offset);
13445
13446   if (TRACE_BRANCH_P (cpu))
13447     {
13448       ++ stack_depth;
13449       TRACE_BRANCH (cpu,
13450                     " %*scall %" PRIx64 " [%s]"
13451                     " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13452                     stack_depth, " ", aarch64_get_next_PC (cpu),
13453                     aarch64_get_func (CPU_STATE (cpu),
13454                                       aarch64_get_next_PC (cpu)),
13455                     aarch64_get_reg_u64 (cpu, 0, NO_SP),
13456                     aarch64_get_reg_u64 (cpu, 1, NO_SP),
13457                     aarch64_get_reg_u64 (cpu, 2, NO_SP)
13458                     );
13459     }
13460 }
13461
13462 /* Unconditional Branch register.
13463    Branch/return address is in source register.  */
13464
13465 /* Unconditional branch.  */
13466 static void
13467 br (sim_cpu *cpu)
13468 {
13469   unsigned rn = INSTR (9, 5);
13470   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13471   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13472 }
13473
13474 /* Unconditional branch and link -- writes return PC to LR.  */
13475 static void
13476 blr (sim_cpu *cpu)
13477 {
13478   unsigned rn = INSTR (9, 5);
13479
13480   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13481   /* The pseudo code in the spec says we update LR before fetching.
13482      the value from the rn.  */
13483   aarch64_save_LR (cpu);
13484   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13485
13486   if (TRACE_BRANCH_P (cpu))
13487     {
13488       ++ stack_depth;
13489       TRACE_BRANCH (cpu,
13490                     " %*scall %" PRIx64 " [%s]"
13491                     " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13492                     stack_depth, " ", aarch64_get_next_PC (cpu),
13493                     aarch64_get_func (CPU_STATE (cpu),
13494                                       aarch64_get_next_PC (cpu)),
13495                     aarch64_get_reg_u64 (cpu, 0, NO_SP),
13496                     aarch64_get_reg_u64 (cpu, 1, NO_SP),
13497                     aarch64_get_reg_u64 (cpu, 2, NO_SP)
13498                     );
13499     }
13500 }
13501
13502 /* Return -- assembler will default source to LR this is functionally
13503    equivalent to br but, presumably, unlike br it side effects the
13504    branch predictor.  */
13505 static void
13506 ret (sim_cpu *cpu)
13507 {
13508   unsigned rn = INSTR (9, 5);
13509   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13510
13511   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13512   if (TRACE_BRANCH_P (cpu))
13513     {
13514       TRACE_BRANCH (cpu,
13515                     " %*sreturn [result: %" PRIx64 "]",
13516                     stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13517       -- stack_depth;
13518     }
13519 }
13520
13521 /* NOP -- we implement this and call it from the decode in case we
13522    want to intercept it later.  */
13523
13524 static void
13525 nop (sim_cpu *cpu)
13526 {
13527   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13528 }
13529
13530 /* Data synchronization barrier.  */
13531
13532 static void
13533 dsb (sim_cpu *cpu)
13534 {
13535   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13536 }
13537
13538 /* Data memory barrier.  */
13539
13540 static void
13541 dmb (sim_cpu *cpu)
13542 {
13543   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13544 }
13545
13546 /* Instruction synchronization barrier.  */
13547
13548 static void
13549 isb (sim_cpu *cpu)
13550 {
13551   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13552 }
13553
13554 static void
13555 dexBranchImmediate (sim_cpu *cpu)
13556 {
13557   /* assert instr[30,26] == 00101
13558      instr[31] ==> 0 == B, 1 == BL
13559      instr[25,0] == imm26 branch offset counted in words.  */
13560
13561   uint32_t top = INSTR (31, 31);
13562   /* We have a 26 byte signed word offset which we need to pass to the
13563      execute routine as a signed byte offset.  */
13564   int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13565
13566   if (top)
13567     bl (cpu, offset);
13568   else
13569     buc (cpu, offset);
13570 }
13571
13572 /* Control Flow.  */
13573
13574 /* Conditional branch
13575
13576    Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13577    a bit position in the range 0 .. 63
13578
13579    cc is a CondCode enum value as pulled out of the decode
13580
13581    N.B. any offset register (source) can only be Xn or Wn.  */
13582
13583 static void
13584 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13585 {
13586   /* The test returns TRUE if CC is met.  */
13587   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13588   if (testConditionCode (cpu, cc))
13589     aarch64_set_next_PC_by_offset (cpu, offset);
13590 }
13591
13592 /* 32 bit branch on register non-zero.  */
13593 static void
13594 cbnz32 (sim_cpu *cpu, int32_t offset)
13595 {
13596   unsigned rt = INSTR (4, 0);
13597
13598   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13599   if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13600     aarch64_set_next_PC_by_offset (cpu, offset);
13601 }
13602
13603 /* 64 bit branch on register zero.  */
13604 static void
13605 cbnz (sim_cpu *cpu, int32_t offset)
13606 {
13607   unsigned rt = INSTR (4, 0);
13608
13609   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13610   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13611     aarch64_set_next_PC_by_offset (cpu, offset);
13612 }
13613
13614 /* 32 bit branch on register non-zero.  */
13615 static void
13616 cbz32 (sim_cpu *cpu, int32_t offset)
13617 {
13618   unsigned rt = INSTR (4, 0);
13619
13620   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13621   if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13622     aarch64_set_next_PC_by_offset (cpu, offset);
13623 }
13624
13625 /* 64 bit branch on register zero.  */
13626 static void
13627 cbz (sim_cpu *cpu, int32_t offset)
13628 {
13629   unsigned rt = INSTR (4, 0);
13630
13631   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13632   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13633     aarch64_set_next_PC_by_offset (cpu, offset);
13634 }
13635
13636 /* Branch on register bit test non-zero -- one size fits all.  */
13637 static void
13638 tbnz (sim_cpu *cpu, uint32_t  pos, int32_t offset)
13639 {
13640   unsigned rt = INSTR (4, 0);
13641
13642   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13643   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13644     aarch64_set_next_PC_by_offset (cpu, offset);
13645 }
13646
13647 /* Branch on register bit test zero -- one size fits all.  */
13648 static void
13649 tbz (sim_cpu *cpu, uint32_t  pos, int32_t offset)
13650 {
13651   unsigned rt = INSTR (4, 0);
13652
13653   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13654   if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13655     aarch64_set_next_PC_by_offset (cpu, offset);
13656 }
13657
13658 static void
13659 dexCompareBranchImmediate (sim_cpu *cpu)
13660 {
13661   /* instr[30,25] = 01 1010
13662      instr[31]    = size : 0 ==> 32, 1 ==> 64
13663      instr[24]    = op : 0 ==> CBZ, 1 ==> CBNZ
13664      instr[23,5]  = simm19 branch offset counted in words
13665      instr[4,0]   = rt  */
13666
13667   uint32_t size = INSTR (31, 31);
13668   uint32_t op   = INSTR (24, 24);
13669   int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13670
13671   if (size == 0)
13672     {
13673       if (op == 0)
13674         cbz32 (cpu, offset);
13675       else
13676         cbnz32 (cpu, offset);
13677     }
13678   else
13679     {
13680       if (op == 0)
13681         cbz (cpu, offset);
13682       else
13683         cbnz (cpu, offset);
13684     }
13685 }
13686
13687 static void
13688 dexTestBranchImmediate (sim_cpu *cpu)
13689 {
13690   /* instr[31]    = b5 : bit 5 of test bit idx
13691      instr[30,25] = 01 1011
13692      instr[24]    = op : 0 ==> TBZ, 1 == TBNZ
13693      instr[23,19] = b40 : bits 4 to 0 of test bit idx
13694      instr[18,5]  = simm14 : signed offset counted in words
13695      instr[4,0]   = uimm5  */
13696
13697   uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13698   int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13699
13700   NYI_assert (30, 25, 0x1b);
13701
13702   if (INSTR (24, 24) == 0)
13703     tbz (cpu, pos, offset);
13704   else
13705     tbnz (cpu, pos, offset);
13706 }
13707
13708 static void
13709 dexCondBranchImmediate (sim_cpu *cpu)
13710 {
13711   /* instr[31,25] = 010 1010
13712      instr[24]    = op1; op => 00 ==> B.cond
13713      instr[23,5]  = simm19 : signed offset counted in words
13714      instr[4]     = op0
13715      instr[3,0]   = cond  */
13716
13717   int32_t offset;
13718   uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13719
13720   NYI_assert (31, 25, 0x2a);
13721
13722   if (op != 0)
13723     HALT_UNALLOC;
13724
13725   offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13726
13727   bcc (cpu, offset, INSTR (3, 0));
13728 }
13729
13730 static void
13731 dexBranchRegister (sim_cpu *cpu)
13732 {
13733   /* instr[31,25] = 110 1011
13734      instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13735      instr[20,16] = op2 : must be 11111
13736      instr[15,10] = op3 : must be 000000
13737      instr[4,0]   = op2 : must be 11111.  */
13738
13739   uint32_t op = INSTR (24, 21);
13740   uint32_t op2 = INSTR (20, 16);
13741   uint32_t op3 = INSTR (15, 10);
13742   uint32_t op4 = INSTR (4, 0);
13743
13744   NYI_assert (31, 25, 0x6b);
13745
13746   if (op2 != 0x1F || op3 != 0 || op4 != 0)
13747     HALT_UNALLOC;
13748
13749   if (op == 0)
13750     br (cpu);
13751
13752   else if (op == 1)
13753     blr (cpu);
13754
13755   else if (op == 2)
13756     ret (cpu);
13757
13758   else
13759     {
13760       /* ERET and DRPS accept 0b11111 for rn = instr [4,0].  */
13761       /* anything else is unallocated.  */
13762       uint32_t rn = INSTR (4, 0);
13763
13764       if (rn != 0x1f)
13765         HALT_UNALLOC;
13766
13767       if (op == 4 || op == 5)
13768         HALT_NYI;
13769
13770       HALT_UNALLOC;
13771     }
13772 }
13773
13774 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13775    but this may not be available.  So instead we define the values we need
13776    here.  */
13777 #define AngelSVC_Reason_Open            0x01
13778 #define AngelSVC_Reason_Close           0x02
13779 #define AngelSVC_Reason_Write           0x05
13780 #define AngelSVC_Reason_Read            0x06
13781 #define AngelSVC_Reason_IsTTY           0x09
13782 #define AngelSVC_Reason_Seek            0x0A
13783 #define AngelSVC_Reason_FLen            0x0C
13784 #define AngelSVC_Reason_Remove          0x0E
13785 #define AngelSVC_Reason_Rename          0x0F
13786 #define AngelSVC_Reason_Clock           0x10
13787 #define AngelSVC_Reason_Time            0x11
13788 #define AngelSVC_Reason_System          0x12
13789 #define AngelSVC_Reason_Errno           0x13
13790 #define AngelSVC_Reason_GetCmdLine      0x15
13791 #define AngelSVC_Reason_HeapInfo        0x16
13792 #define AngelSVC_Reason_ReportException 0x18
13793 #define AngelSVC_Reason_Elapsed         0x30
13794
13795
13796 static void
13797 handle_halt (sim_cpu *cpu, uint32_t val)
13798 {
13799   uint64_t result = 0;
13800
13801   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13802   if (val != 0xf000)
13803     {
13804       TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13805       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13806                        sim_stopped, SIM_SIGTRAP);
13807     }
13808
13809   /* We have encountered an Angel SVC call.  See if we can process it.  */
13810   switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13811     {
13812     case AngelSVC_Reason_HeapInfo:
13813       {
13814         /* Get the values.  */
13815         uint64_t stack_top = aarch64_get_stack_start (cpu);
13816         uint64_t heap_base = aarch64_get_heap_start (cpu);
13817
13818         /* Get the pointer  */
13819         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13820         ptr = aarch64_get_mem_u64 (cpu, ptr);
13821
13822         /* Fill in the memory block.  */
13823         /* Start addr of heap.  */
13824         aarch64_set_mem_u64 (cpu, ptr +  0, heap_base);
13825         /* End addr of heap.  */
13826         aarch64_set_mem_u64 (cpu, ptr +  8, stack_top);
13827         /* Lowest stack addr.  */
13828         aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13829         /* Initial stack addr.  */
13830         aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13831
13832         TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13833       }
13834       break;
13835
13836     case AngelSVC_Reason_Open:
13837       {
13838         /* Get the pointer  */
13839         /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);.  */
13840         /* FIXME: For now we just assume that we will only be asked
13841            to open the standard file descriptors.  */
13842         static int fd = 0;
13843         result = fd ++;
13844
13845         TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13846       }
13847       break;
13848
13849     case AngelSVC_Reason_Close:
13850       {
13851         uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13852         TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13853         result = 0;
13854       }
13855       break;
13856
13857     case AngelSVC_Reason_Errno:
13858       result = 0;
13859       TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13860       break;
13861
13862     case AngelSVC_Reason_Clock:
13863       result =
13864 #ifdef CLOCKS_PER_SEC
13865         (CLOCKS_PER_SEC >= 100)
13866         ? (clock () / (CLOCKS_PER_SEC / 100))
13867         : ((clock () * 100) / CLOCKS_PER_SEC)
13868 #else
13869         /* Presume unix... clock() returns microseconds.  */
13870         (clock () / 10000)
13871 #endif
13872         ;
13873         TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13874       break;
13875
13876     case AngelSVC_Reason_GetCmdLine:
13877       {
13878         /* Get the pointer  */
13879         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13880         ptr = aarch64_get_mem_u64 (cpu, ptr);
13881
13882         /* FIXME: No command line for now.  */
13883         aarch64_set_mem_u64 (cpu, ptr, 0);
13884         TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13885       }
13886       break;
13887
13888     case AngelSVC_Reason_IsTTY:
13889       result = 1;
13890         TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13891       break;
13892
13893     case AngelSVC_Reason_Write:
13894       {
13895         /* Get the pointer  */
13896         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13897         /* Get the write control block.  */
13898         uint64_t fd  = aarch64_get_mem_u64 (cpu, ptr);
13899         uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13900         uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13901
13902         TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13903                        PRIx64 " on descriptor %" PRIx64,
13904                        len, buf, fd);
13905
13906         if (len > 1280)
13907           {
13908             TRACE_SYSCALL (cpu,
13909                            " AngelSVC: Write: Suspiciously long write: %ld",
13910                            (long) len);
13911             sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13912                              sim_stopped, SIM_SIGBUS);
13913           }
13914         else if (fd == 1)
13915           {
13916             printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13917           }
13918         else if (fd == 2)
13919           {
13920             TRACE (cpu, 0, "\n");
13921             sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13922                             (int) len, aarch64_get_mem_ptr (cpu, buf));
13923             TRACE (cpu, 0, "\n");
13924           }
13925         else
13926           {
13927             TRACE_SYSCALL (cpu,
13928                            " AngelSVC: Write: Unexpected file handle: %d",
13929                            (int) fd);
13930             sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13931                              sim_stopped, SIM_SIGABRT);
13932           }
13933       }
13934       break;
13935
13936     case AngelSVC_Reason_ReportException:
13937       {
13938         /* Get the pointer  */
13939         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13940         /*ptr = aarch64_get_mem_u64 (cpu, ptr);.  */
13941         uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13942         uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13943
13944         TRACE_SYSCALL (cpu,
13945                        "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13946                        type, state);
13947
13948         if (type == 0x20026)
13949           sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13950                            sim_exited, state);
13951         else
13952           sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13953                            sim_stopped, SIM_SIGINT);
13954       }
13955       break;
13956
13957     case AngelSVC_Reason_Read:
13958     case AngelSVC_Reason_FLen:
13959     case AngelSVC_Reason_Seek:
13960     case AngelSVC_Reason_Remove:
13961     case AngelSVC_Reason_Time:
13962     case AngelSVC_Reason_System:
13963     case AngelSVC_Reason_Rename:
13964     case AngelSVC_Reason_Elapsed:
13965     default:
13966       TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13967                      aarch64_get_reg_u32 (cpu, 0, NO_SP));
13968       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13969                        sim_stopped, SIM_SIGTRAP);
13970     }
13971
13972   aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13973 }
13974
13975 static void
13976 dexExcpnGen (sim_cpu *cpu)
13977 {
13978   /* instr[31:24] = 11010100
13979      instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13980                           010 ==> HLT,       101 ==> DBG GEN EXCPN
13981      instr[20,5]  = imm16
13982      instr[4,2]   = opc2 000 ==> OK, ow ==> UNALLOC
13983      instr[1,0]   = LL : discriminates opc  */
13984
13985   uint32_t opc = INSTR (23, 21);
13986   uint32_t imm16 = INSTR (20, 5);
13987   uint32_t opc2 = INSTR (4, 2);
13988   uint32_t LL;
13989
13990   NYI_assert (31, 24, 0xd4);
13991
13992   if (opc2 != 0)
13993     HALT_UNALLOC;
13994
13995   LL = INSTR (1, 0);
13996
13997   /* We only implement HLT and BRK for now.  */
13998   if (opc == 1 && LL == 0)
13999     {
14000       TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
14001       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
14002                        sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
14003     }
14004
14005   if (opc == 2 && LL == 0)
14006     handle_halt (cpu, imm16);
14007
14008   else if (opc == 0 || opc == 5)
14009     HALT_NYI;
14010
14011   else
14012     HALT_UNALLOC;
14013 }
14014
14015 /* Stub for accessing system registers.  */
14016
14017 static uint64_t
14018 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
14019             unsigned crm, unsigned op2)
14020 {
14021   if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
14022     /* DCZID_EL0 - the Data Cache Zero ID register.
14023        We do not support DC ZVA at the moment, so
14024        we return a value with the disable bit set.
14025        We implement support for the DCZID register since
14026        it is used by the C library's memset function.  */
14027     return ((uint64_t) 1) << 4;
14028
14029   if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
14030     /* Cache Type Register.  */
14031     return 0x80008000UL;
14032
14033   if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
14034     /* TPIDR_EL0 - thread pointer id.  */
14035     return aarch64_get_thread_id (cpu);
14036
14037   if (op1 == 3 && crm == 4 && op2 == 0)
14038     return aarch64_get_FPCR (cpu);
14039
14040   if (op1 == 3 && crm == 4 && op2 == 1)
14041     return aarch64_get_FPSR (cpu);
14042
14043   else if (op1 == 3 && crm == 2 && op2 == 0)
14044     return aarch64_get_CPSR (cpu);
14045
14046   HALT_NYI;
14047 }
14048
14049 static void
14050 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
14051             unsigned crm, unsigned op2, uint64_t val)
14052 {
14053   if (op1 == 3 && crm == 4 && op2 == 0)
14054     aarch64_set_FPCR (cpu, val);
14055
14056   else if (op1 == 3 && crm == 4 && op2 == 1)
14057     aarch64_set_FPSR (cpu, val);
14058
14059   else if (op1 == 3 && crm == 2 && op2 == 0)
14060     aarch64_set_CPSR (cpu, val);
14061
14062   else
14063     HALT_NYI;
14064 }
14065
14066 static void
14067 do_mrs (sim_cpu *cpu)
14068 {
14069   /* instr[31:20] = 1101 0101 0001 1
14070      instr[19]    = op0
14071      instr[18,16] = op1
14072      instr[15,12] = CRn
14073      instr[11,8]  = CRm
14074      instr[7,5]   = op2
14075      instr[4,0]   = Rt  */
14076   unsigned sys_op0 = INSTR (19, 19) + 2;
14077   unsigned sys_op1 = INSTR (18, 16);
14078   unsigned sys_crn = INSTR (15, 12);
14079   unsigned sys_crm = INSTR (11, 8);
14080   unsigned sys_op2 = INSTR (7, 5);
14081   unsigned rt = INSTR (4, 0);
14082
14083   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14084   aarch64_set_reg_u64 (cpu, rt, NO_SP,
14085                        system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
14086 }
14087
14088 static void
14089 do_MSR_immediate (sim_cpu *cpu)
14090 {
14091   /* instr[31:19] = 1101 0101 0000 0
14092      instr[18,16] = op1
14093      instr[15,12] = 0100
14094      instr[11,8]  = CRm
14095      instr[7,5]   = op2
14096      instr[4,0]   = 1 1111  */
14097
14098   unsigned op1 = INSTR (18, 16);
14099   /*unsigned crm = INSTR (11, 8);*/
14100   unsigned op2 = INSTR (7, 5);
14101
14102   NYI_assert (31, 19, 0x1AA0);
14103   NYI_assert (15, 12, 0x4);
14104   NYI_assert (4,  0,  0x1F);
14105
14106   if (op1 == 0)
14107     {
14108       if (op2 == 5)
14109         HALT_NYI; /* set SPSel.  */
14110       else
14111         HALT_UNALLOC;
14112     }
14113   else if (op1 == 3)
14114     {
14115       if (op2 == 6)
14116         HALT_NYI; /* set DAIFset.  */
14117       else if (op2 == 7)
14118         HALT_NYI; /* set DAIFclr.  */
14119       else
14120         HALT_UNALLOC;
14121     }
14122   else
14123     HALT_UNALLOC;
14124 }
14125
14126 static void
14127 do_MSR_reg (sim_cpu *cpu)
14128 {
14129   /* instr[31:20] = 1101 0101 0001
14130      instr[19]    = op0
14131      instr[18,16] = op1
14132      instr[15,12] = CRn
14133      instr[11,8]  = CRm
14134      instr[7,5]   = op2
14135      instr[4,0]   = Rt  */
14136
14137   unsigned sys_op0 = INSTR (19, 19) + 2;
14138   unsigned sys_op1 = INSTR (18, 16);
14139   unsigned sys_crn = INSTR (15, 12);
14140   unsigned sys_crm = INSTR (11, 8);
14141   unsigned sys_op2 = INSTR (7, 5);
14142   unsigned rt = INSTR (4, 0);
14143
14144   NYI_assert (31, 20, 0xD51);
14145
14146   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14147   system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
14148               aarch64_get_reg_u64 (cpu, rt, NO_SP));
14149 }
14150
14151 static void
14152 do_SYS (sim_cpu *cpu)
14153 {
14154   /* instr[31,19] = 1101 0101 0000 1
14155      instr[18,16] = op1
14156      instr[15,12] = CRn
14157      instr[11,8]  = CRm
14158      instr[7,5]   = op2
14159      instr[4,0]   = Rt  */
14160   NYI_assert (31, 19, 0x1AA1);
14161
14162   /* FIXME: For now we just silently accept system ops.  */
14163 }
14164
14165 static void
14166 dexSystem (sim_cpu *cpu)
14167 {
14168   /* instr[31:22] = 1101 01010 0
14169      instr[21]    = L
14170      instr[20,19] = op0
14171      instr[18,16] = op1
14172      instr[15,12] = CRn
14173      instr[11,8]  = CRm
14174      instr[7,5]   = op2
14175      instr[4,0]   = uimm5  */
14176
14177   /* We are interested in HINT, DSB, DMB and ISB
14178
14179      Hint #0 encodes NOOP (this is the only hint we care about)
14180      L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
14181      CRm op2  != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
14182
14183      DSB, DMB, ISB are data store barrier, data memory barrier and
14184      instruction store barrier, respectively, where
14185
14186      L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
14187      op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
14188      CRm<3:2> ==> domain, CRm<1:0> ==> types,
14189      domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
14190               10 ==> InerShareable, 11 ==> FullSystem
14191      types :  01 ==> Reads, 10 ==> Writes,
14192               11 ==> All, 00 ==> All (domain == FullSystem).  */
14193
14194   unsigned rt = INSTR (4, 0);
14195
14196   NYI_assert (31, 22, 0x354);
14197
14198   switch (INSTR (21, 12))
14199     {
14200     case 0x032:
14201       if (rt == 0x1F)
14202         {
14203           /* NOP has CRm != 0000 OR.  */
14204           /*         (CRm == 0000 AND (op2 == 000 OR op2 > 101)).  */
14205           uint32_t crm = INSTR (11, 8);
14206           uint32_t op2 = INSTR (7, 5);
14207
14208           if (crm != 0 || (op2 == 0 || op2 > 5))
14209             {
14210               /* Actually call nop method so we can reimplement it later.  */
14211               nop (cpu);
14212               return;
14213             }
14214         }
14215       HALT_NYI;
14216
14217     case 0x033:
14218       {
14219         uint32_t op2 =  INSTR (7, 5);
14220
14221         switch (op2)
14222           {
14223           case 2: HALT_NYI;
14224           case 4: dsb (cpu); return;
14225           case 5: dmb (cpu); return;
14226           case 6: isb (cpu); return;
14227           default: HALT_UNALLOC;
14228         }
14229       }
14230
14231     case 0x3B0:
14232     case 0x3B4:
14233     case 0x3BD:
14234       do_mrs (cpu);
14235       return;
14236
14237     case 0x0B7:
14238       do_SYS (cpu); /* DC is an alias of SYS.  */
14239       return;
14240
14241     default:
14242       if (INSTR (21, 20) == 0x1)
14243         do_MSR_reg (cpu);
14244       else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
14245         do_MSR_immediate (cpu);
14246       else
14247         HALT_NYI;
14248       return;
14249     }
14250 }
14251
14252 static void
14253 dexBr (sim_cpu *cpu)
14254 {
14255   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
14256      assert  group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
14257      bits [31,29] of a BrExSys are the secondary dispatch vector.  */
14258   uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14259
14260   switch (group2)
14261     {
14262     case BR_IMM_000:
14263       return dexBranchImmediate (cpu);
14264
14265     case BR_IMMCMP_001:
14266       /* Compare has bit 25 clear while test has it set.  */
14267       if (!INSTR (25, 25))
14268         dexCompareBranchImmediate (cpu);
14269       else
14270         dexTestBranchImmediate (cpu);
14271       return;
14272
14273     case BR_IMMCOND_010:
14274       /* This is a conditional branch if bit 25 is clear otherwise
14275          unallocated.  */
14276       if (!INSTR (25, 25))
14277         dexCondBranchImmediate (cpu);
14278       else
14279         HALT_UNALLOC;
14280       return;
14281
14282     case BR_UNALLOC_011:
14283       HALT_UNALLOC;
14284
14285     case BR_IMM_100:
14286       dexBranchImmediate (cpu);
14287       return;
14288
14289     case BR_IMMCMP_101:
14290       /* Compare has bit 25 clear while test has it set.  */
14291       if (!INSTR (25, 25))
14292         dexCompareBranchImmediate (cpu);
14293       else
14294         dexTestBranchImmediate (cpu);
14295       return;
14296
14297     case BR_REG_110:
14298       /* Unconditional branch reg has bit 25 set.  */
14299       if (INSTR (25, 25))
14300         dexBranchRegister (cpu);
14301
14302       /* This includes both Excpn Gen, System and unalloc operations.
14303          We need to decode the Excpn Gen operation BRK so we can plant
14304          debugger entry points.
14305          Excpn Gen operations have instr [24] = 0.
14306          we need to decode at least one of the System operations NOP
14307          which is an alias for HINT #0.
14308          System operations have instr [24,22] = 100.  */
14309       else if (INSTR (24, 24) == 0)
14310         dexExcpnGen (cpu);
14311
14312       else if (INSTR (24, 22) == 4)
14313         dexSystem (cpu);
14314
14315       else
14316         HALT_UNALLOC;
14317
14318       return;
14319
14320     case BR_UNALLOC_111:
14321       HALT_UNALLOC;
14322
14323     default:
14324       /* Should never reach here.  */
14325       HALT_NYI;
14326     }
14327 }
14328
14329 static void
14330 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14331 {
14332   /* We need to check if gdb wants an in here.  */
14333   /* checkBreak (cpu);.  */
14334
14335   uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14336
14337   switch (group)
14338     {
14339     case GROUP_PSEUDO_0000:   dexPseudo (cpu); break;
14340     case GROUP_LDST_0100:     dexLdSt (cpu); break;
14341     case GROUP_DPREG_0101:    dexDPReg (cpu); break;
14342     case GROUP_LDST_0110:     dexLdSt (cpu); break;
14343     case GROUP_ADVSIMD_0111:  dexAdvSIMD0 (cpu); break;
14344     case GROUP_DPIMM_1000:    dexDPImm (cpu); break;
14345     case GROUP_DPIMM_1001:    dexDPImm (cpu); break;
14346     case GROUP_BREXSYS_1010:  dexBr (cpu); break;
14347     case GROUP_BREXSYS_1011:  dexBr (cpu); break;
14348     case GROUP_LDST_1100:     dexLdSt (cpu); break;
14349     case GROUP_DPREG_1101:    dexDPReg (cpu); break;
14350     case GROUP_LDST_1110:     dexLdSt (cpu); break;
14351     case GROUP_ADVSIMD_1111:  dexAdvSIMD1 (cpu); break;
14352
14353     case GROUP_UNALLOC_0001:
14354     case GROUP_UNALLOC_0010:
14355     case GROUP_UNALLOC_0011:
14356       HALT_UNALLOC;
14357
14358     default:
14359       /* Should never reach here.  */
14360       HALT_NYI;
14361     }
14362 }
14363
14364 static bfd_boolean
14365 aarch64_step (sim_cpu *cpu)
14366 {
14367   uint64_t pc = aarch64_get_PC (cpu);
14368
14369   if (pc == TOP_LEVEL_RETURN_PC)
14370     return FALSE;
14371
14372   aarch64_set_next_PC (cpu, pc + 4);
14373
14374   /* Code is always little-endian.  */
14375   sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14376                         & aarch64_get_instr (cpu), pc, 4);
14377   aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14378
14379   TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14380               aarch64_get_instr (cpu));
14381   TRACE_DISASM (cpu, pc);
14382
14383   aarch64_decode_and_execute (cpu, pc);
14384
14385   return TRUE;
14386 }
14387
14388 void
14389 aarch64_run (SIM_DESC sd)
14390 {
14391   sim_cpu *cpu = STATE_CPU (sd, 0);
14392
14393   while (aarch64_step (cpu))
14394     {
14395       aarch64_update_PC (cpu);
14396
14397       if (sim_events_tick (sd))
14398         sim_events_process (sd);
14399     }
14400
14401   sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14402                    sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14403 }
14404
14405 void
14406 aarch64_init (sim_cpu *cpu, uint64_t pc)
14407 {
14408   uint64_t sp = aarch64_get_stack_start (cpu);
14409
14410   /* Install SP, FP and PC and set LR to -20
14411      so we can detect a top-level return.  */
14412   aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14413   aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14414   aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14415   aarch64_set_next_PC (cpu, pc);
14416   aarch64_update_PC (cpu);
14417   aarch64_init_LIT_table ();
14418 }