Fix more bugs in AArch64 simulator.
[deliverable/binutils-gdb.git] / sim / aarch64 / simulator.c
1 /* simulator.c -- Interface for the AArch64 simulator.
2
3 Copyright (C) 2015-2016 Free Software Foundation, Inc.
4
5 Contributed by Red Hat.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <sys/types.h>
27 #include <math.h>
28 #include <time.h>
29 #include <limits.h>
30
31 #include "simulator.h"
32 #include "cpustate.h"
33 #include "memory.h"
34
35 #define NO_SP 0
36 #define SP_OK 1
37
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
41
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
44
45 #define HALT_UNALLOC \
46 do \
47 { \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
49 TRACE_INSN (cpu, \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
55 } \
56 while (0)
57
58 #define HALT_NYI \
59 do \
60 { \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
62 TRACE_INSN (cpu, \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
67 { \
68 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: "); \
69 trace_disasm (CPU_STATE (cpu), cpu, aarch64_get_PC (cpu)); \
70 } \
71 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
72 sim_stopped, SIM_SIGABRT); \
73 } \
74 while (0)
75
76 #define NYI_assert(HI, LO, EXPECTED) \
77 do \
78 { \
79 if (INSTR ((HI), (LO)) != (EXPECTED)) \
80 HALT_NYI; \
81 } \
82 while (0)
83
84 /* Helper functions used by expandLogicalImmediate. */
85
86 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
87 static inline uint64_t
88 ones (int N)
89 {
90 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
91 }
92
93 /* result<0> to val<N> */
94 static inline uint64_t
95 pickbit (uint64_t val, int N)
96 {
97 return pickbits64 (val, N, N);
98 }
99
100 static uint64_t
101 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
102 {
103 uint64_t mask;
104 uint64_t imm;
105 unsigned simd_size;
106
107 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
108 (in other words, right rotated by R), then replicated. */
109 if (N != 0)
110 {
111 simd_size = 64;
112 mask = 0xffffffffffffffffull;
113 }
114 else
115 {
116 switch (S)
117 {
118 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
119 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
120 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
121 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
122 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
123 default: return 0;
124 }
125 mask = (1ull << simd_size) - 1;
126 /* Top bits are IGNORED. */
127 R &= simd_size - 1;
128 }
129
130 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
131 if (S == simd_size - 1)
132 return 0;
133
134 /* S+1 consecutive bits to 1. */
135 /* NOTE: S can't be 63 due to detection above. */
136 imm = (1ull << (S + 1)) - 1;
137
138 /* Rotate to the left by simd_size - R. */
139 if (R != 0)
140 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
141
142 /* Replicate the value according to SIMD size. */
143 switch (simd_size)
144 {
145 case 2: imm = (imm << 2) | imm;
146 case 4: imm = (imm << 4) | imm;
147 case 8: imm = (imm << 8) | imm;
148 case 16: imm = (imm << 16) | imm;
149 case 32: imm = (imm << 32) | imm;
150 case 64: break;
151 default: return 0;
152 }
153
154 return imm;
155 }
156
157 /* Instr[22,10] encodes N immr and imms. we want a lookup table
158 for each possible combination i.e. 13 bits worth of int entries. */
159 #define LI_TABLE_SIZE (1 << 13)
160 static uint64_t LITable[LI_TABLE_SIZE];
161
162 void
163 aarch64_init_LIT_table (void)
164 {
165 unsigned index;
166
167 for (index = 0; index < LI_TABLE_SIZE; index++)
168 {
169 uint32_t N = uimm (index, 12, 12);
170 uint32_t immr = uimm (index, 11, 6);
171 uint32_t imms = uimm (index, 5, 0);
172
173 LITable [index] = expand_logical_immediate (imms, immr, N);
174 }
175 }
176
177 static void
178 dexNotify (sim_cpu *cpu)
179 {
180 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
181 2 ==> exit Java, 3 ==> start next bytecode. */
182 uint32_t type = INSTR (14, 0);
183
184 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
185
186 switch (type)
187 {
188 case 0:
189 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
190 aarch64_get_reg_u64 (cpu, R22, 0)); */
191 break;
192 case 1:
193 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
194 aarch64_get_reg_u64 (cpu, R22, 0)); */
195 break;
196 case 2:
197 /* aarch64_notifyMethodExit (); */
198 break;
199 case 3:
200 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
201 aarch64_get_reg_u64 (cpu, R22, 0)); */
202 break;
203 }
204 }
205
206 /* secondary decode within top level groups */
207
208 static void
209 dexPseudo (sim_cpu *cpu)
210 {
211 /* assert instr[28,27] = 00
212
213 We provide 2 pseudo instructions:
214
215 HALT stops execution of the simulator causing an immediate
216 return to the x86 code which entered it.
217
218 CALLOUT initiates recursive entry into x86 code. A register
219 argument holds the address of the x86 routine. Immediate
220 values in the instruction identify the number of general
221 purpose and floating point register arguments to be passed
222 and the type of any value to be returned. */
223
224 uint32_t PSEUDO_HALT = 0xE0000000U;
225 uint32_t PSEUDO_CALLOUT = 0x00018000U;
226 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
227 uint32_t PSEUDO_NOTIFY = 0x00014000U;
228 uint32_t dispatch;
229
230 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
231 {
232 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
233 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
234 sim_stopped, SIM_SIGTRAP);
235 }
236
237 dispatch = INSTR (31, 15);
238
239 /* We do not handle callouts at the moment. */
240 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
241 {
242 TRACE_EVENTS (cpu, " Callout");
243 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
244 sim_stopped, SIM_SIGABRT);
245 }
246
247 else if (dispatch == PSEUDO_NOTIFY)
248 dexNotify (cpu);
249
250 else
251 HALT_UNALLOC;
252 }
253
254 /* Load-store single register (unscaled offset)
255 These instructions employ a base register plus an unscaled signed
256 9 bit offset.
257
258 N.B. the base register (source) can be Xn or SP. all other
259 registers may not be SP. */
260
261 /* 32 bit load 32 bit unscaled signed 9 bit. */
262 static void
263 ldur32 (sim_cpu *cpu, int32_t offset)
264 {
265 unsigned rn = INSTR (9, 5);
266 unsigned rt = INSTR (4, 0);
267
268 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
269 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
270 + offset));
271 }
272
273 /* 64 bit load 64 bit unscaled signed 9 bit. */
274 static void
275 ldur64 (sim_cpu *cpu, int32_t offset)
276 {
277 unsigned rn = INSTR (9, 5);
278 unsigned rt = INSTR (4, 0);
279
280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
282 + offset));
283 }
284
285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
286 static void
287 ldurb32 (sim_cpu *cpu, int32_t offset)
288 {
289 unsigned rn = INSTR (9, 5);
290 unsigned rt = INSTR (4, 0);
291
292 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
293 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
294 + offset));
295 }
296
297 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
298 static void
299 ldursb32 (sim_cpu *cpu, int32_t offset)
300 {
301 unsigned rn = INSTR (9, 5);
302 unsigned rt = INSTR (4, 0);
303
304 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
305 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
306 + offset));
307 }
308
309 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
310 static void
311 ldursb64 (sim_cpu *cpu, int32_t offset)
312 {
313 unsigned rn = INSTR (9, 5);
314 unsigned rt = INSTR (4, 0);
315
316 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
317 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
318 + offset));
319 }
320
321 /* 32 bit load zero-extended short unscaled signed 9 bit */
322 static void
323 ldurh32 (sim_cpu *cpu, int32_t offset)
324 {
325 unsigned rn = INSTR (9, 5);
326 unsigned rd = INSTR (4, 0);
327
328 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
329 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
330 + offset));
331 }
332
333 /* 32 bit load sign-extended short unscaled signed 9 bit */
334 static void
335 ldursh32 (sim_cpu *cpu, int32_t offset)
336 {
337 unsigned rn = INSTR (9, 5);
338 unsigned rd = INSTR (4, 0);
339
340 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
341 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
342 + offset));
343 }
344
345 /* 64 bit load sign-extended short unscaled signed 9 bit */
346 static void
347 ldursh64 (sim_cpu *cpu, int32_t offset)
348 {
349 unsigned rn = INSTR (9, 5);
350 unsigned rt = INSTR (4, 0);
351
352 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
353 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
354 + offset));
355 }
356
357 /* 64 bit load sign-extended word unscaled signed 9 bit */
358 static void
359 ldursw (sim_cpu *cpu, int32_t offset)
360 {
361 unsigned rn = INSTR (9, 5);
362 unsigned rd = INSTR (4, 0);
363
364 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
365 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
366 + offset));
367 }
368
369 /* N.B. with stores the value in source is written to the address
370 identified by source2 modified by offset. */
371
372 /* 32 bit store 32 bit unscaled signed 9 bit. */
373 static void
374 stur32 (sim_cpu *cpu, int32_t offset)
375 {
376 unsigned rn = INSTR (9, 5);
377 unsigned rd = INSTR (4, 0);
378
379 aarch64_set_mem_u32 (cpu,
380 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
381 aarch64_get_reg_u32 (cpu, rd, NO_SP));
382 }
383
384 /* 64 bit store 64 bit unscaled signed 9 bit */
385 static void
386 stur64 (sim_cpu *cpu, int32_t offset)
387 {
388 unsigned rn = INSTR (9, 5);
389 unsigned rd = INSTR (4, 0);
390
391 aarch64_set_mem_u64 (cpu,
392 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
393 aarch64_get_reg_u64 (cpu, rd, NO_SP));
394 }
395
396 /* 32 bit store byte unscaled signed 9 bit */
397 static void
398 sturb (sim_cpu *cpu, int32_t offset)
399 {
400 unsigned rn = INSTR (9, 5);
401 unsigned rd = INSTR (4, 0);
402
403 aarch64_set_mem_u8 (cpu,
404 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
405 aarch64_get_reg_u8 (cpu, rd, NO_SP));
406 }
407
408 /* 32 bit store short unscaled signed 9 bit */
409 static void
410 sturh (sim_cpu *cpu, int32_t offset)
411 {
412 unsigned rn = INSTR (9, 5);
413 unsigned rd = INSTR (4, 0);
414
415 aarch64_set_mem_u16 (cpu,
416 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
417 aarch64_get_reg_u16 (cpu, rd, NO_SP));
418 }
419
420 /* Load single register pc-relative label
421 Offset is a signed 19 bit immediate count in words
422 rt may not be SP. */
423
424 /* 32 bit pc-relative load */
425 static void
426 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
427 {
428 unsigned rd = INSTR (4, 0);
429
430 aarch64_set_reg_u64 (cpu, rd, NO_SP,
431 aarch64_get_mem_u32
432 (cpu, aarch64_get_PC (cpu) + offset * 4));
433 }
434
435 /* 64 bit pc-relative load */
436 static void
437 ldr_pcrel (sim_cpu *cpu, int32_t offset)
438 {
439 unsigned rd = INSTR (4, 0);
440
441 aarch64_set_reg_u64 (cpu, rd, NO_SP,
442 aarch64_get_mem_u64
443 (cpu, aarch64_get_PC (cpu) + offset * 4));
444 }
445
446 /* sign extended 32 bit pc-relative load */
447 static void
448 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
449 {
450 unsigned rd = INSTR (4, 0);
451
452 aarch64_set_reg_u64 (cpu, rd, NO_SP,
453 aarch64_get_mem_s32
454 (cpu, aarch64_get_PC (cpu) + offset * 4));
455 }
456
457 /* float pc-relative load */
458 static void
459 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
460 {
461 unsigned int rd = INSTR (4, 0);
462
463 aarch64_set_vec_u32 (cpu, rd, 0,
464 aarch64_get_mem_u32
465 (cpu, aarch64_get_PC (cpu) + offset * 4));
466 }
467
468 /* double pc-relative load */
469 static void
470 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
471 {
472 unsigned int st = INSTR (4, 0);
473
474 aarch64_set_vec_u64 (cpu, st, 0,
475 aarch64_get_mem_u64
476 (cpu, aarch64_get_PC (cpu) + offset * 4));
477 }
478
479 /* long double pc-relative load. */
480 static void
481 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
482 {
483 unsigned int st = INSTR (4, 0);
484 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
485 FRegister a;
486
487 aarch64_get_mem_long_double (cpu, addr, & a);
488 aarch64_set_FP_long_double (cpu, st, a);
489 }
490
491 /* This can be used to scale an offset by applying
492 the requisite shift. the second argument is either
493 16, 32 or 64. */
494
495 #define SCALE(_offset, _elementSize) \
496 ((_offset) << ScaleShift ## _elementSize)
497
498 /* This can be used to optionally scale a register derived offset
499 by applying the requisite shift as indicated by the Scaling
500 argument. The second argument is either Byte, Short, Word
501 or Long. The third argument is either Scaled or Unscaled.
502 N.B. when _Scaling is Scaled the shift gets ANDed with
503 all 1s while when it is Unscaled it gets ANDed with 0. */
504
505 #define OPT_SCALE(_offset, _elementType, _Scaling) \
506 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
507
508 /* This can be used to zero or sign extend a 32 bit register derived
509 value to a 64 bit value. the first argument must be the value as
510 a uint32_t and the second must be either UXTW or SXTW. The result
511 is returned as an int64_t. */
512
513 static inline int64_t
514 extend (uint32_t value, Extension extension)
515 {
516 union
517 {
518 uint32_t u;
519 int32_t n;
520 } x;
521
522 /* A branchless variant of this ought to be possible. */
523 if (extension == UXTW || extension == NoExtension)
524 return value;
525
526 x.u = value;
527 return x.n;
528 }
529
530 /* Scalar Floating Point
531
532 FP load/store single register (4 addressing modes)
533
534 N.B. the base register (source) can be the stack pointer.
535 The secondary source register (source2) can only be an Xn register. */
536
537 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
538 static void
539 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
540 {
541 unsigned rn = INSTR (9, 5);
542 unsigned st = INSTR (4, 0);
543 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
544
545 if (wb != Post)
546 address += offset;
547
548 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
549 if (wb == Post)
550 address += offset;
551
552 if (wb != NoWriteBack)
553 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
554 }
555
556 /* Load 8 bit with unsigned 12 bit offset. */
557 static void
558 fldrb_abs (sim_cpu *cpu, uint32_t offset)
559 {
560 unsigned rd = INSTR (4, 0);
561 unsigned rn = INSTR (9, 5);
562 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
563
564 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
565 }
566
567 /* Load 16 bit scaled unsigned 12 bit. */
568 static void
569 fldrh_abs (sim_cpu *cpu, uint32_t offset)
570 {
571 unsigned rd = INSTR (4, 0);
572 unsigned rn = INSTR (9, 5);
573 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
574
575 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
576 }
577
578 /* Load 32 bit scaled unsigned 12 bit. */
579 static void
580 fldrs_abs (sim_cpu *cpu, uint32_t offset)
581 {
582 unsigned rd = INSTR (4, 0);
583 unsigned rn = INSTR (9, 5);
584 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
585
586 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
587 }
588
589 /* Load 64 bit scaled unsigned 12 bit. */
590 static void
591 fldrd_abs (sim_cpu *cpu, uint32_t offset)
592 {
593 unsigned rd = INSTR (4, 0);
594 unsigned rn = INSTR (9, 5);
595 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
596
597 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
598 }
599
600 /* Load 128 bit scaled unsigned 12 bit. */
601 static void
602 fldrq_abs (sim_cpu *cpu, uint32_t offset)
603 {
604 unsigned rd = INSTR (4, 0);
605 unsigned rn = INSTR (9, 5);
606 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
607
608 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
609 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
610 }
611
612 /* Load 32 bit scaled or unscaled zero- or sign-extended
613 32-bit register offset. */
614 static void
615 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
616 {
617 unsigned rm = INSTR (20, 16);
618 unsigned rn = INSTR (9, 5);
619 unsigned st = INSTR (4, 0);
620 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
621 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
622 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
623
624 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
625 (cpu, address + displacement));
626 }
627
628 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
629 static void
630 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
631 {
632 unsigned rn = INSTR (9, 5);
633 unsigned st = INSTR (4, 0);
634 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
635
636 if (wb != Post)
637 address += offset;
638
639 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
640
641 if (wb == Post)
642 address += offset;
643
644 if (wb != NoWriteBack)
645 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
646 }
647
648 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
649 static void
650 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
651 {
652 unsigned rm = INSTR (20, 16);
653 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
654 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
655
656 fldrd_wb (cpu, displacement, NoWriteBack);
657 }
658
659 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
660 static void
661 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
662 {
663 FRegister a;
664 unsigned rn = INSTR (9, 5);
665 unsigned st = INSTR (4, 0);
666 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
667
668 if (wb != Post)
669 address += offset;
670
671 aarch64_get_mem_long_double (cpu, address, & a);
672 aarch64_set_FP_long_double (cpu, st, a);
673
674 if (wb == Post)
675 address += offset;
676
677 if (wb != NoWriteBack)
678 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
679 }
680
681 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
682 static void
683 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
684 {
685 unsigned rm = INSTR (20, 16);
686 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
687 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
688
689 fldrq_wb (cpu, displacement, NoWriteBack);
690 }
691
692 /* Memory Access
693
694 load-store single register
695 There are four addressing modes available here which all employ a
696 64 bit source (base) register.
697
698 N.B. the base register (source) can be the stack pointer.
699 The secondary source register (source2)can only be an Xn register.
700
701 Scaled, 12-bit, unsigned immediate offset, without pre- and
702 post-index options.
703 Unscaled, 9-bit, signed immediate offset with pre- or post-index
704 writeback.
705 scaled or unscaled 64-bit register offset.
706 scaled or unscaled 32-bit extended register offset.
707
708 All offsets are assumed to be raw from the decode i.e. the
709 simulator is expected to adjust scaled offsets based on the
710 accessed data size with register or extended register offset
711 versions the same applies except that in the latter case the
712 operation may also require a sign extend.
713
714 A separate method is provided for each possible addressing mode. */
715
716 /* 32 bit load 32 bit scaled unsigned 12 bit */
717 static void
718 ldr32_abs (sim_cpu *cpu, uint32_t offset)
719 {
720 unsigned rn = INSTR (9, 5);
721 unsigned rt = INSTR (4, 0);
722
723 /* The target register may not be SP but the source may be. */
724 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
725 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
726 + SCALE (offset, 32)));
727 }
728
729 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
730 static void
731 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
732 {
733 unsigned rn = INSTR (9, 5);
734 unsigned rt = INSTR (4, 0);
735 uint64_t address;
736
737 if (rn == rt && wb != NoWriteBack)
738 HALT_UNALLOC;
739
740 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
741
742 if (wb != Post)
743 address += offset;
744
745 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
746
747 if (wb == Post)
748 address += offset;
749
750 if (wb != NoWriteBack)
751 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
752 }
753
754 /* 32 bit load 32 bit scaled or unscaled
755 zero- or sign-extended 32-bit register offset */
756 static void
757 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
758 {
759 unsigned rm = INSTR (20, 16);
760 unsigned rn = INSTR (9, 5);
761 unsigned rt = INSTR (4, 0);
762 /* rn may reference SP, rm and rt must reference ZR */
763
764 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
765 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
766 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
767
768 aarch64_set_reg_u64 (cpu, rt, NO_SP,
769 aarch64_get_mem_u32 (cpu, address + displacement));
770 }
771
772 /* 64 bit load 64 bit scaled unsigned 12 bit */
773 static void
774 ldr_abs (sim_cpu *cpu, uint32_t offset)
775 {
776 unsigned rn = INSTR (9, 5);
777 unsigned rt = INSTR (4, 0);
778
779 /* The target register may not be SP but the source may be. */
780 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
781 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
782 + SCALE (offset, 64)));
783 }
784
785 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
786 static void
787 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
788 {
789 unsigned rn = INSTR (9, 5);
790 unsigned rt = INSTR (4, 0);
791 uint64_t address;
792
793 if (rn == rt && wb != NoWriteBack)
794 HALT_UNALLOC;
795
796 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
797
798 if (wb != Post)
799 address += offset;
800
801 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
802
803 if (wb == Post)
804 address += offset;
805
806 if (wb != NoWriteBack)
807 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
808 }
809
810 /* 64 bit load 64 bit scaled or unscaled zero-
811 or sign-extended 32-bit register offset. */
812 static void
813 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
814 {
815 unsigned rm = INSTR (20, 16);
816 unsigned rn = INSTR (9, 5);
817 unsigned rt = INSTR (4, 0);
818 /* rn may reference SP, rm and rt must reference ZR */
819
820 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
821 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
822 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
823
824 aarch64_set_reg_u64 (cpu, rt, NO_SP,
825 aarch64_get_mem_u64 (cpu, address + displacement));
826 }
827
828 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
829 static void
830 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
831 {
832 unsigned rn = INSTR (9, 5);
833 unsigned rt = INSTR (4, 0);
834
835 /* The target register may not be SP but the source may be
836 there is no scaling required for a byte load. */
837 aarch64_set_reg_u64 (cpu, rt, NO_SP,
838 aarch64_get_mem_u8
839 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
840 }
841
842 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
843 static void
844 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
845 {
846 unsigned rn = INSTR (9, 5);
847 unsigned rt = INSTR (4, 0);
848 uint64_t address;
849
850 if (rn == rt && wb != NoWriteBack)
851 HALT_UNALLOC;
852
853 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
854
855 if (wb != Post)
856 address += offset;
857
858 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
859
860 if (wb == Post)
861 address += offset;
862
863 if (wb != NoWriteBack)
864 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
865 }
866
867 /* 32 bit load zero-extended byte scaled or unscaled zero-
868 or sign-extended 32-bit register offset. */
869 static void
870 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
871 {
872 unsigned rm = INSTR (20, 16);
873 unsigned rn = INSTR (9, 5);
874 unsigned rt = INSTR (4, 0);
875 /* rn may reference SP, rm and rt must reference ZR */
876
877 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
878 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
879 extension);
880
881 /* There is no scaling required for a byte load. */
882 aarch64_set_reg_u64 (cpu, rt, NO_SP,
883 aarch64_get_mem_u8 (cpu, address + displacement));
884 }
885
886 /* 64 bit load sign-extended byte unscaled signed 9 bit
887 with pre- or post-writeback. */
888 static void
889 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
890 {
891 unsigned rn = INSTR (9, 5);
892 unsigned rt = INSTR (4, 0);
893 uint64_t address;
894 int64_t val;
895
896 if (rn == rt && wb != NoWriteBack)
897 HALT_UNALLOC;
898
899 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
900
901 if (wb != Post)
902 address += offset;
903
904 val = aarch64_get_mem_s8 (cpu, address);
905 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
906
907 if (wb == Post)
908 address += offset;
909
910 if (wb != NoWriteBack)
911 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
912 }
913
914 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
915 static void
916 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
917 {
918 ldrsb_wb (cpu, offset, NoWriteBack);
919 }
920
921 /* 64 bit load sign-extended byte scaled or unscaled zero-
922 or sign-extended 32-bit register offset. */
923 static void
924 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
925 {
926 unsigned rm = INSTR (20, 16);
927 unsigned rn = INSTR (9, 5);
928 unsigned rt = INSTR (4, 0);
929 /* rn may reference SP, rm and rt must reference ZR */
930
931 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
932 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
933 extension);
934 /* There is no scaling required for a byte load. */
935 aarch64_set_reg_s64 (cpu, rt, NO_SP,
936 aarch64_get_mem_s8 (cpu, address + displacement));
937 }
938
939 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
940 static void
941 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
942 {
943 unsigned rn = INSTR (9, 5);
944 unsigned rt = INSTR (4, 0);
945 uint32_t val;
946
947 /* The target register may not be SP but the source may be. */
948 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
949 + SCALE (offset, 16));
950 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
951 }
952
953 /* 32 bit load zero-extended short unscaled signed 9 bit
954 with pre- or post-writeback. */
955 static void
956 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
957 {
958 unsigned rn = INSTR (9, 5);
959 unsigned rt = INSTR (4, 0);
960 uint64_t address;
961
962 if (rn == rt && wb != NoWriteBack)
963 HALT_UNALLOC;
964
965 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
966
967 if (wb != Post)
968 address += offset;
969
970 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
971
972 if (wb == Post)
973 address += offset;
974
975 if (wb != NoWriteBack)
976 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
977 }
978
979 /* 32 bit load zero-extended short scaled or unscaled zero-
980 or sign-extended 32-bit register offset. */
981 static void
982 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
983 {
984 unsigned rm = INSTR (20, 16);
985 unsigned rn = INSTR (9, 5);
986 unsigned rt = INSTR (4, 0);
987 /* rn may reference SP, rm and rt must reference ZR */
988
989 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
990 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
991 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
992
993 aarch64_set_reg_u32 (cpu, rt, NO_SP,
994 aarch64_get_mem_u16 (cpu, address + displacement));
995 }
996
997 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
998 static void
999 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1000 {
1001 unsigned rn = INSTR (9, 5);
1002 unsigned rt = INSTR (4, 0);
1003 int32_t val;
1004
1005 /* The target register may not be SP but the source may be. */
1006 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1007 + SCALE (offset, 16));
1008 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1009 }
1010
1011 /* 32 bit load sign-extended short unscaled signed 9 bit
1012 with pre- or post-writeback. */
1013 static void
1014 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1015 {
1016 unsigned rn = INSTR (9, 5);
1017 unsigned rt = INSTR (4, 0);
1018 uint64_t address;
1019
1020 if (rn == rt && wb != NoWriteBack)
1021 HALT_UNALLOC;
1022
1023 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1024
1025 if (wb != Post)
1026 address += offset;
1027
1028 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1029 (int32_t) aarch64_get_mem_s16 (cpu, address));
1030
1031 if (wb == Post)
1032 address += offset;
1033
1034 if (wb != NoWriteBack)
1035 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1036 }
1037
1038 /* 32 bit load sign-extended short scaled or unscaled zero-
1039 or sign-extended 32-bit register offset. */
1040 static void
1041 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1042 {
1043 unsigned rm = INSTR (20, 16);
1044 unsigned rn = INSTR (9, 5);
1045 unsigned rt = INSTR (4, 0);
1046 /* rn may reference SP, rm and rt must reference ZR */
1047
1048 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1049 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1050 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1051
1052 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1053 (int32_t) aarch64_get_mem_s16
1054 (cpu, address + displacement));
1055 }
1056
1057 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1058 static void
1059 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1060 {
1061 unsigned rn = INSTR (9, 5);
1062 unsigned rt = INSTR (4, 0);
1063 int64_t val;
1064
1065 /* The target register may not be SP but the source may be. */
1066 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1067 + SCALE (offset, 16));
1068 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1069 }
1070
1071 /* 64 bit load sign-extended short unscaled signed 9 bit
1072 with pre- or post-writeback. */
1073 static void
1074 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1075 {
1076 unsigned rn = INSTR (9, 5);
1077 unsigned rt = INSTR (4, 0);
1078 uint64_t address;
1079 int64_t val;
1080
1081 if (rn == rt && wb != NoWriteBack)
1082 HALT_UNALLOC;
1083
1084 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1085
1086 if (wb != Post)
1087 address += offset;
1088
1089 val = aarch64_get_mem_s16 (cpu, address);
1090 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1091
1092 if (wb == Post)
1093 address += offset;
1094
1095 if (wb != NoWriteBack)
1096 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1097 }
1098
1099 /* 64 bit load sign-extended short scaled or unscaled zero-
1100 or sign-extended 32-bit register offset. */
1101 static void
1102 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1103 {
1104 unsigned rm = INSTR (20, 16);
1105 unsigned rn = INSTR (9, 5);
1106 unsigned rt = INSTR (4, 0);
1107
1108 /* rn may reference SP, rm and rt must reference ZR */
1109
1110 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1111 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1112 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1113 int64_t val;
1114
1115 val = aarch64_get_mem_s16 (cpu, address + displacement);
1116 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1117 }
1118
1119 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1120 static void
1121 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1122 {
1123 unsigned rn = INSTR (9, 5);
1124 unsigned rt = INSTR (4, 0);
1125 int64_t val;
1126
1127 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1128 + SCALE (offset, 32));
1129 /* The target register may not be SP but the source may be. */
1130 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1131 }
1132
1133 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1134 with pre- or post-writeback. */
1135 static void
1136 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1137 {
1138 unsigned rn = INSTR (9, 5);
1139 unsigned rt = INSTR (4, 0);
1140 uint64_t address;
1141
1142 if (rn == rt && wb != NoWriteBack)
1143 HALT_UNALLOC;
1144
1145 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1146
1147 if (wb != Post)
1148 address += offset;
1149
1150 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1151
1152 if (wb == Post)
1153 address += offset;
1154
1155 if (wb != NoWriteBack)
1156 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1157 }
1158
1159 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1160 or sign-extended 32-bit register offset. */
1161 static void
1162 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1163 {
1164 unsigned rm = INSTR (20, 16);
1165 unsigned rn = INSTR (9, 5);
1166 unsigned rt = INSTR (4, 0);
1167 /* rn may reference SP, rm and rt must reference ZR */
1168
1169 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1170 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1171 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1172
1173 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1174 aarch64_get_mem_s32 (cpu, address + displacement));
1175 }
1176
1177 /* N.B. with stores the value in source is written to the
1178 address identified by source2 modified by source3/offset. */
1179
1180 /* 32 bit store scaled unsigned 12 bit. */
1181 static void
1182 str32_abs (sim_cpu *cpu, uint32_t offset)
1183 {
1184 unsigned rn = INSTR (9, 5);
1185 unsigned rt = INSTR (4, 0);
1186
1187 /* The target register may not be SP but the source may be. */
1188 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1189 + SCALE (offset, 32)),
1190 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1191 }
1192
1193 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1194 static void
1195 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1196 {
1197 unsigned rn = INSTR (9, 5);
1198 unsigned rt = INSTR (4, 0);
1199 uint64_t address;
1200
1201 if (rn == rt && wb != NoWriteBack)
1202 HALT_UNALLOC;
1203
1204 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1205 if (wb != Post)
1206 address += offset;
1207
1208 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1209
1210 if (wb == Post)
1211 address += offset;
1212
1213 if (wb != NoWriteBack)
1214 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1215 }
1216
1217 /* 32 bit store scaled or unscaled zero- or
1218 sign-extended 32-bit register offset. */
1219 static void
1220 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1221 {
1222 unsigned rm = INSTR (20, 16);
1223 unsigned rn = INSTR (9, 5);
1224 unsigned rt = INSTR (4, 0);
1225
1226 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1227 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1228 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1229
1230 aarch64_set_mem_u32 (cpu, address + displacement,
1231 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1232 }
1233
1234 /* 64 bit store scaled unsigned 12 bit. */
1235 static void
1236 str_abs (sim_cpu *cpu, uint32_t offset)
1237 {
1238 unsigned rn = INSTR (9, 5);
1239 unsigned rt = INSTR (4, 0);
1240
1241 aarch64_set_mem_u64 (cpu,
1242 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1243 + SCALE (offset, 64),
1244 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1245 }
1246
1247 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1248 static void
1249 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1250 {
1251 unsigned rn = INSTR (9, 5);
1252 unsigned rt = INSTR (4, 0);
1253 uint64_t address;
1254
1255 if (rn == rt && wb != NoWriteBack)
1256 HALT_UNALLOC;
1257
1258 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1259
1260 if (wb != Post)
1261 address += offset;
1262
1263 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1264
1265 if (wb == Post)
1266 address += offset;
1267
1268 if (wb != NoWriteBack)
1269 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1270 }
1271
1272 /* 64 bit store scaled or unscaled zero-
1273 or sign-extended 32-bit register offset. */
1274 static void
1275 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1276 {
1277 unsigned rm = INSTR (20, 16);
1278 unsigned rn = INSTR (9, 5);
1279 unsigned rt = INSTR (4, 0);
1280 /* rn may reference SP, rm and rt must reference ZR */
1281
1282 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1283 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1284 extension);
1285 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1286
1287 aarch64_set_mem_u64 (cpu, address + displacement,
1288 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1289 }
1290
1291 /* 32 bit store byte scaled unsigned 12 bit. */
1292 static void
1293 strb_abs (sim_cpu *cpu, uint32_t offset)
1294 {
1295 unsigned rn = INSTR (9, 5);
1296 unsigned rt = INSTR (4, 0);
1297
1298 /* The target register may not be SP but the source may be.
1299 There is no scaling required for a byte load. */
1300 aarch64_set_mem_u8 (cpu,
1301 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1302 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1303 }
1304
1305 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1306 static void
1307 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1308 {
1309 unsigned rn = INSTR (9, 5);
1310 unsigned rt = INSTR (4, 0);
1311 uint64_t address;
1312
1313 if (rn == rt && wb != NoWriteBack)
1314 HALT_UNALLOC;
1315
1316 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1317
1318 if (wb != Post)
1319 address += offset;
1320
1321 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1322
1323 if (wb == Post)
1324 address += offset;
1325
1326 if (wb != NoWriteBack)
1327 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1328 }
1329
1330 /* 32 bit store byte scaled or unscaled zero-
1331 or sign-extended 32-bit register offset. */
1332 static void
1333 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1334 {
1335 unsigned rm = INSTR (20, 16);
1336 unsigned rn = INSTR (9, 5);
1337 unsigned rt = INSTR (4, 0);
1338 /* rn may reference SP, rm and rt must reference ZR */
1339
1340 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1341 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1342 extension);
1343
1344 /* There is no scaling required for a byte load. */
1345 aarch64_set_mem_u8 (cpu, address + displacement,
1346 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1347 }
1348
1349 /* 32 bit store short scaled unsigned 12 bit. */
1350 static void
1351 strh_abs (sim_cpu *cpu, uint32_t offset)
1352 {
1353 unsigned rn = INSTR (9, 5);
1354 unsigned rt = INSTR (4, 0);
1355
1356 /* The target register may not be SP but the source may be. */
1357 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1358 + SCALE (offset, 16),
1359 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1360 }
1361
1362 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1363 static void
1364 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1365 {
1366 unsigned rn = INSTR (9, 5);
1367 unsigned rt = INSTR (4, 0);
1368 uint64_t address;
1369
1370 if (rn == rt && wb != NoWriteBack)
1371 HALT_UNALLOC;
1372
1373 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1374
1375 if (wb != Post)
1376 address += offset;
1377
1378 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1379
1380 if (wb == Post)
1381 address += offset;
1382
1383 if (wb != NoWriteBack)
1384 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1385 }
1386
1387 /* 32 bit store short scaled or unscaled zero-
1388 or sign-extended 32-bit register offset. */
1389 static void
1390 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1391 {
1392 unsigned rm = INSTR (20, 16);
1393 unsigned rn = INSTR (9, 5);
1394 unsigned rt = INSTR (4, 0);
1395 /* rn may reference SP, rm and rt must reference ZR */
1396
1397 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1399 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1400
1401 aarch64_set_mem_u16 (cpu, address + displacement,
1402 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1403 }
1404
1405 /* Prefetch unsigned 12 bit. */
1406 static void
1407 prfm_abs (sim_cpu *cpu, uint32_t offset)
1408 {
1409 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1410 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1411 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1412 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1413 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1414 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1415 ow ==> UNALLOC
1416 PrfOp prfop = prfop (instr, 4, 0);
1417 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1418 + SCALE (offset, 64). */
1419
1420 /* TODO : implement prefetch of address. */
1421 }
1422
1423 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1424 static void
1425 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1426 {
1427 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1428 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1429 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1430 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1431 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1432 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1433 ow ==> UNALLOC
1434 rn may reference SP, rm may only reference ZR
1435 PrfOp prfop = prfop (instr, 4, 0);
1436 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1437 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1438 extension);
1439 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1440 uint64_t address = base + displacement. */
1441
1442 /* TODO : implement prefetch of address */
1443 }
1444
1445 /* 64 bit pc-relative prefetch. */
1446 static void
1447 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1448 {
1449 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1450 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1451 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1452 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1453 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1454 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1455 ow ==> UNALLOC
1456 PrfOp prfop = prfop (instr, 4, 0);
1457 uint64_t address = aarch64_get_PC (cpu) + offset. */
1458
1459 /* TODO : implement this */
1460 }
1461
1462 /* Load-store exclusive. */
1463
1464 static void
1465 ldxr (sim_cpu *cpu)
1466 {
1467 unsigned rn = INSTR (9, 5);
1468 unsigned rt = INSTR (4, 0);
1469 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1470 int size = INSTR (31, 30);
1471 /* int ordered = INSTR (15, 15); */
1472 /* int exclusive = ! INSTR (23, 23); */
1473
1474 switch (size)
1475 {
1476 case 0:
1477 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1478 break;
1479 case 1:
1480 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1481 break;
1482 case 2:
1483 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1484 break;
1485 case 3:
1486 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1487 break;
1488 }
1489 }
1490
1491 static void
1492 stxr (sim_cpu *cpu)
1493 {
1494 unsigned rn = INSTR (9, 5);
1495 unsigned rt = INSTR (4, 0);
1496 unsigned rs = INSTR (20, 16);
1497 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498 int size = INSTR (31, 30);
1499 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1500
1501 switch (size)
1502 {
1503 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1504 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1505 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1506 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1507 }
1508
1509 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1510 }
1511
1512 static void
1513 dexLoadLiteral (sim_cpu *cpu)
1514 {
1515 /* instr[29,27] == 011
1516 instr[25,24] == 00
1517 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1518 010 ==> LDRX, 011 ==> FLDRD
1519 100 ==> LDRSW, 101 ==> FLDRQ
1520 110 ==> PRFM, 111 ==> UNALLOC
1521 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1522 instr[23, 5] == simm19 */
1523
1524 /* unsigned rt = INSTR (4, 0); */
1525 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1526 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1527
1528 switch (dispatch)
1529 {
1530 case 0: ldr32_pcrel (cpu, imm); break;
1531 case 1: fldrs_pcrel (cpu, imm); break;
1532 case 2: ldr_pcrel (cpu, imm); break;
1533 case 3: fldrd_pcrel (cpu, imm); break;
1534 case 4: ldrsw_pcrel (cpu, imm); break;
1535 case 5: fldrq_pcrel (cpu, imm); break;
1536 case 6: prfm_pcrel (cpu, imm); break;
1537 case 7:
1538 default:
1539 HALT_UNALLOC;
1540 }
1541 }
1542
1543 /* Immediate arithmetic
1544 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1545 value left shifted by 12 bits (done at decode).
1546
1547 N.B. the register args (dest, source) can normally be Xn or SP.
1548 the exception occurs for flag setting instructions which may
1549 only use Xn for the output (dest). */
1550
1551 /* 32 bit add immediate. */
1552 static void
1553 add32 (sim_cpu *cpu, uint32_t aimm)
1554 {
1555 unsigned rn = INSTR (9, 5);
1556 unsigned rd = INSTR (4, 0);
1557
1558 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1559 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1560 }
1561
1562 /* 64 bit add immediate. */
1563 static void
1564 add64 (sim_cpu *cpu, uint32_t aimm)
1565 {
1566 unsigned rn = INSTR (9, 5);
1567 unsigned rd = INSTR (4, 0);
1568
1569 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1570 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1571 }
1572
1573 static void
1574 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1575 {
1576 int32_t result = value1 + value2;
1577 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1578 uint64_t uresult = (uint64_t)(uint32_t) value1
1579 + (uint64_t)(uint32_t) value2;
1580 uint32_t flags = 0;
1581
1582 if (result == 0)
1583 flags |= Z;
1584
1585 if (result & (1 << 31))
1586 flags |= N;
1587
1588 if (uresult != result)
1589 flags |= C;
1590
1591 if (sresult != result)
1592 flags |= V;
1593
1594 aarch64_set_CPSR (cpu, flags);
1595 }
1596
1597 static void
1598 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1599 {
1600 int64_t sval1 = value1;
1601 int64_t sval2 = value2;
1602 uint64_t result = value1 + value2;
1603 int64_t sresult = sval1 + sval2;
1604 uint32_t flags = 0;
1605
1606 if (result == 0)
1607 flags |= Z;
1608
1609 if (result & (1ULL << 63))
1610 flags |= N;
1611
1612 if (sval1 < 0)
1613 {
1614 if (sval2 < 0)
1615 {
1616 /* Negative plus a negative. Overflow happens if
1617 the result is greater than either of the operands. */
1618 if (sresult > sval1 || sresult > sval2)
1619 flags |= V;
1620 }
1621 /* else Negative plus a positive. Overflow cannot happen. */
1622 }
1623 else /* value1 is +ve. */
1624 {
1625 if (sval2 < 0)
1626 {
1627 /* Overflow can only occur if we computed "0 - MININT". */
1628 if (sval1 == 0 && sval2 == (1LL << 63))
1629 flags |= V;
1630 }
1631 else
1632 {
1633 /* Postive plus positive - overflow has happened if the
1634 result is smaller than either of the operands. */
1635 if (result < value1 || result < value2)
1636 flags |= V | C;
1637 }
1638 }
1639
1640 aarch64_set_CPSR (cpu, flags);
1641 }
1642
1643 #define NEG(a) (((a) & signbit) == signbit)
1644 #define POS(a) (((a) & signbit) == 0)
1645
1646 static void
1647 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1648 {
1649 uint32_t result = value1 - value2;
1650 uint32_t flags = 0;
1651 uint32_t signbit = 1U << 31;
1652
1653 if (result == 0)
1654 flags |= Z;
1655
1656 if (NEG (result))
1657 flags |= N;
1658
1659 if ( (NEG (value1) && POS (value2))
1660 || (NEG (value1) && POS (result))
1661 || (POS (value2) && POS (result)))
1662 flags |= C;
1663
1664 if ( (NEG (value1) && POS (value2) && POS (result))
1665 || (POS (value1) && NEG (value2) && NEG (result)))
1666 flags |= V;
1667
1668 aarch64_set_CPSR (cpu, flags);
1669 }
1670
1671 static void
1672 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1673 {
1674 uint64_t result = value1 - value2;
1675 uint32_t flags = 0;
1676 uint64_t signbit = 1ULL << 63;
1677
1678 if (result == 0)
1679 flags |= Z;
1680
1681 if (NEG (result))
1682 flags |= N;
1683
1684 if ( (NEG (value1) && POS (value2))
1685 || (NEG (value1) && POS (result))
1686 || (POS (value2) && POS (result)))
1687 flags |= C;
1688
1689 if ( (NEG (value1) && POS (value2) && POS (result))
1690 || (POS (value1) && NEG (value2) && NEG (result)))
1691 flags |= V;
1692
1693 aarch64_set_CPSR (cpu, flags);
1694 }
1695
1696 static void
1697 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1698 {
1699 uint32_t flags = 0;
1700
1701 if (result == 0)
1702 flags |= Z;
1703 else
1704 flags &= ~ Z;
1705
1706 if (result & (1 << 31))
1707 flags |= N;
1708 else
1709 flags &= ~ N;
1710
1711 aarch64_set_CPSR (cpu, flags);
1712 }
1713
1714 static void
1715 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1716 {
1717 uint32_t flags = 0;
1718
1719 if (result == 0)
1720 flags |= Z;
1721 else
1722 flags &= ~ Z;
1723
1724 if (result & (1ULL << 63))
1725 flags |= N;
1726 else
1727 flags &= ~ N;
1728
1729 aarch64_set_CPSR (cpu, flags);
1730 }
1731
1732 /* 32 bit add immediate set flags. */
1733 static void
1734 adds32 (sim_cpu *cpu, uint32_t aimm)
1735 {
1736 unsigned rn = INSTR (9, 5);
1737 unsigned rd = INSTR (4, 0);
1738 /* TODO : do we need to worry about signs here? */
1739 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1740
1741 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1742 set_flags_for_add32 (cpu, value1, aimm);
1743 }
1744
1745 /* 64 bit add immediate set flags. */
1746 static void
1747 adds64 (sim_cpu *cpu, uint32_t aimm)
1748 {
1749 unsigned rn = INSTR (9, 5);
1750 unsigned rd = INSTR (4, 0);
1751 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1752 uint64_t value2 = aimm;
1753
1754 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1755 set_flags_for_add64 (cpu, value1, value2);
1756 }
1757
1758 /* 32 bit sub immediate. */
1759 static void
1760 sub32 (sim_cpu *cpu, uint32_t aimm)
1761 {
1762 unsigned rn = INSTR (9, 5);
1763 unsigned rd = INSTR (4, 0);
1764
1765 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1766 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1767 }
1768
1769 /* 64 bit sub immediate. */
1770 static void
1771 sub64 (sim_cpu *cpu, uint32_t aimm)
1772 {
1773 unsigned rn = INSTR (9, 5);
1774 unsigned rd = INSTR (4, 0);
1775
1776 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1777 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1778 }
1779
1780 /* 32 bit sub immediate set flags. */
1781 static void
1782 subs32 (sim_cpu *cpu, uint32_t aimm)
1783 {
1784 unsigned rn = INSTR (9, 5);
1785 unsigned rd = INSTR (4, 0);
1786 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1787 uint32_t value2 = aimm;
1788
1789 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1790 set_flags_for_sub32 (cpu, value1, value2);
1791 }
1792
1793 /* 64 bit sub immediate set flags. */
1794 static void
1795 subs64 (sim_cpu *cpu, uint32_t aimm)
1796 {
1797 unsigned rn = INSTR (9, 5);
1798 unsigned rd = INSTR (4, 0);
1799 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1800 uint32_t value2 = aimm;
1801
1802 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1803 set_flags_for_sub64 (cpu, value1, value2);
1804 }
1805
1806 /* Data Processing Register. */
1807
1808 /* First two helpers to perform the shift operations. */
1809
1810 static inline uint32_t
1811 shifted32 (uint32_t value, Shift shift, uint32_t count)
1812 {
1813 switch (shift)
1814 {
1815 default:
1816 case LSL:
1817 return (value << count);
1818 case LSR:
1819 return (value >> count);
1820 case ASR:
1821 {
1822 int32_t svalue = value;
1823 return (svalue >> count);
1824 }
1825 case ROR:
1826 {
1827 uint32_t top = value >> count;
1828 uint32_t bottom = value << (32 - count);
1829 return (bottom | top);
1830 }
1831 }
1832 }
1833
1834 static inline uint64_t
1835 shifted64 (uint64_t value, Shift shift, uint32_t count)
1836 {
1837 switch (shift)
1838 {
1839 default:
1840 case LSL:
1841 return (value << count);
1842 case LSR:
1843 return (value >> count);
1844 case ASR:
1845 {
1846 int64_t svalue = value;
1847 return (svalue >> count);
1848 }
1849 case ROR:
1850 {
1851 uint64_t top = value >> count;
1852 uint64_t bottom = value << (64 - count);
1853 return (bottom | top);
1854 }
1855 }
1856 }
1857
1858 /* Arithmetic shifted register.
1859 These allow an optional LSL, ASR or LSR to the second source
1860 register with a count up to the register bit count.
1861
1862 N.B register args may not be SP. */
1863
1864 /* 32 bit ADD shifted register. */
1865 static void
1866 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1867 {
1868 unsigned rm = INSTR (20, 16);
1869 unsigned rn = INSTR (9, 5);
1870 unsigned rd = INSTR (4, 0);
1871
1872 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1873 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1874 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1875 shift, count));
1876 }
1877
1878 /* 64 bit ADD shifted register. */
1879 static void
1880 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1881 {
1882 unsigned rm = INSTR (20, 16);
1883 unsigned rn = INSTR (9, 5);
1884 unsigned rd = INSTR (4, 0);
1885
1886 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1887 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1888 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1889 shift, count));
1890 }
1891
1892 /* 32 bit ADD shifted register setting flags. */
1893 static void
1894 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1895 {
1896 unsigned rm = INSTR (20, 16);
1897 unsigned rn = INSTR (9, 5);
1898 unsigned rd = INSTR (4, 0);
1899
1900 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1901 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1902 shift, count);
1903
1904 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1905 set_flags_for_add32 (cpu, value1, value2);
1906 }
1907
1908 /* 64 bit ADD shifted register setting flags. */
1909 static void
1910 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1911 {
1912 unsigned rm = INSTR (20, 16);
1913 unsigned rn = INSTR (9, 5);
1914 unsigned rd = INSTR (4, 0);
1915
1916 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1917 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1918 shift, count);
1919
1920 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1921 set_flags_for_add64 (cpu, value1, value2);
1922 }
1923
1924 /* 32 bit SUB shifted register. */
1925 static void
1926 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1927 {
1928 unsigned rm = INSTR (20, 16);
1929 unsigned rn = INSTR (9, 5);
1930 unsigned rd = INSTR (4, 0);
1931
1932 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1933 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1934 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1935 shift, count));
1936 }
1937
1938 /* 64 bit SUB shifted register. */
1939 static void
1940 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1941 {
1942 unsigned rm = INSTR (20, 16);
1943 unsigned rn = INSTR (9, 5);
1944 unsigned rd = INSTR (4, 0);
1945
1946 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1947 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1948 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1949 shift, count));
1950 }
1951
1952 /* 32 bit SUB shifted register setting flags. */
1953 static void
1954 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1955 {
1956 unsigned rm = INSTR (20, 16);
1957 unsigned rn = INSTR (9, 5);
1958 unsigned rd = INSTR (4, 0);
1959
1960 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1961 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1962 shift, count);
1963
1964 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1965 set_flags_for_sub32 (cpu, value1, value2);
1966 }
1967
1968 /* 64 bit SUB shifted register setting flags. */
1969 static void
1970 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1971 {
1972 unsigned rm = INSTR (20, 16);
1973 unsigned rn = INSTR (9, 5);
1974 unsigned rd = INSTR (4, 0);
1975
1976 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1977 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1978 shift, count);
1979
1980 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1981 set_flags_for_sub64 (cpu, value1, value2);
1982 }
1983
1984 /* First a couple more helpers to fetch the
1985 relevant source register element either
1986 sign or zero extended as required by the
1987 extension value. */
1988
1989 static uint32_t
1990 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
1991 {
1992 switch (extension)
1993 {
1994 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
1995 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
1996 case UXTW: /* Fall through. */
1997 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
1998 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
1999 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2000 case SXTW: /* Fall through. */
2001 case SXTX: /* Fall through. */
2002 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2003 }
2004 }
2005
2006 static uint64_t
2007 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2008 {
2009 switch (extension)
2010 {
2011 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2012 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2013 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2014 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2015 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2016 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2017 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2018 case SXTX:
2019 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2020 }
2021 }
2022
2023 /* Arithmetic extending register
2024 These allow an optional sign extension of some portion of the
2025 second source register followed by an optional left shift of
2026 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2027
2028 N.B output (dest) and first input arg (source) may normally be Xn
2029 or SP. However, for flag setting operations dest can only be
2030 Xn. Second input registers are always Xn. */
2031
2032 /* 32 bit ADD extending register. */
2033 static void
2034 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2035 {
2036 unsigned rm = INSTR (20, 16);
2037 unsigned rn = INSTR (9, 5);
2038 unsigned rd = INSTR (4, 0);
2039
2040 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2041 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2042 + (extreg32 (cpu, rm, extension) << shift));
2043 }
2044
2045 /* 64 bit ADD extending register.
2046 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2047 static void
2048 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2049 {
2050 unsigned rm = INSTR (20, 16);
2051 unsigned rn = INSTR (9, 5);
2052 unsigned rd = INSTR (4, 0);
2053
2054 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2055 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2056 + (extreg64 (cpu, rm, extension) << shift));
2057 }
2058
2059 /* 32 bit ADD extending register setting flags. */
2060 static void
2061 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2062 {
2063 unsigned rm = INSTR (20, 16);
2064 unsigned rn = INSTR (9, 5);
2065 unsigned rd = INSTR (4, 0);
2066
2067 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2068 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2069
2070 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2071 set_flags_for_add32 (cpu, value1, value2);
2072 }
2073
2074 /* 64 bit ADD extending register setting flags */
2075 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2076 static void
2077 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2078 {
2079 unsigned rm = INSTR (20, 16);
2080 unsigned rn = INSTR (9, 5);
2081 unsigned rd = INSTR (4, 0);
2082
2083 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2084 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2085
2086 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2087 set_flags_for_add64 (cpu, value1, value2);
2088 }
2089
2090 /* 32 bit SUB extending register. */
2091 static void
2092 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2093 {
2094 unsigned rm = INSTR (20, 16);
2095 unsigned rn = INSTR (9, 5);
2096 unsigned rd = INSTR (4, 0);
2097
2098 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2099 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2100 - (extreg32 (cpu, rm, extension) << shift));
2101 }
2102
2103 /* 64 bit SUB extending register. */
2104 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2105 static void
2106 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2107 {
2108 unsigned rm = INSTR (20, 16);
2109 unsigned rn = INSTR (9, 5);
2110 unsigned rd = INSTR (4, 0);
2111
2112 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2113 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2114 - (extreg64 (cpu, rm, extension) << shift));
2115 }
2116
2117 /* 32 bit SUB extending register setting flags. */
2118 static void
2119 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2120 {
2121 unsigned rm = INSTR (20, 16);
2122 unsigned rn = INSTR (9, 5);
2123 unsigned rd = INSTR (4, 0);
2124
2125 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2126 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2127
2128 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2129 set_flags_for_sub32 (cpu, value1, value2);
2130 }
2131
2132 /* 64 bit SUB extending register setting flags */
2133 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2134 static void
2135 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2136 {
2137 unsigned rm = INSTR (20, 16);
2138 unsigned rn = INSTR (9, 5);
2139 unsigned rd = INSTR (4, 0);
2140
2141 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2142 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2143
2144 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2145 set_flags_for_sub64 (cpu, value1, value2);
2146 }
2147
2148 static void
2149 dexAddSubtractImmediate (sim_cpu *cpu)
2150 {
2151 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2152 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2153 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2154 instr[28,24] = 10001
2155 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2156 instr[21,10] = uimm12
2157 instr[9,5] = Rn
2158 instr[4,0] = Rd */
2159
2160 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2161 uint32_t shift = INSTR (23, 22);
2162 uint32_t imm = INSTR (21, 10);
2163 uint32_t dispatch = INSTR (31, 29);
2164
2165 NYI_assert (28, 24, 0x11);
2166
2167 if (shift > 1)
2168 HALT_UNALLOC;
2169
2170 if (shift)
2171 imm <<= 12;
2172
2173 switch (dispatch)
2174 {
2175 case 0: add32 (cpu, imm); break;
2176 case 1: adds32 (cpu, imm); break;
2177 case 2: sub32 (cpu, imm); break;
2178 case 3: subs32 (cpu, imm); break;
2179 case 4: add64 (cpu, imm); break;
2180 case 5: adds64 (cpu, imm); break;
2181 case 6: sub64 (cpu, imm); break;
2182 case 7: subs64 (cpu, imm); break;
2183 }
2184 }
2185
2186 static void
2187 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2188 {
2189 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2190 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2191 instr[28,24] = 01011
2192 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2193 instr[21] = 0
2194 instr[20,16] = Rm
2195 instr[15,10] = count : must be 0xxxxx for 32 bit
2196 instr[9,5] = Rn
2197 instr[4,0] = Rd */
2198
2199 uint32_t size = INSTR (31, 31);
2200 uint32_t count = INSTR (15, 10);
2201 Shift shiftType = INSTR (23, 22);
2202
2203 NYI_assert (28, 24, 0x0B);
2204 NYI_assert (21, 21, 0);
2205
2206 /* Shift encoded as ROR is unallocated. */
2207 if (shiftType == ROR)
2208 HALT_UNALLOC;
2209
2210 /* 32 bit operations must have count[5] = 0
2211 or else we have an UNALLOC. */
2212 if (size == 0 && uimm (count, 5, 5))
2213 HALT_UNALLOC;
2214
2215 /* Dispatch on size:op i.e instr [31,29]. */
2216 switch (INSTR (31, 29))
2217 {
2218 case 0: add32_shift (cpu, shiftType, count); break;
2219 case 1: adds32_shift (cpu, shiftType, count); break;
2220 case 2: sub32_shift (cpu, shiftType, count); break;
2221 case 3: subs32_shift (cpu, shiftType, count); break;
2222 case 4: add64_shift (cpu, shiftType, count); break;
2223 case 5: adds64_shift (cpu, shiftType, count); break;
2224 case 6: sub64_shift (cpu, shiftType, count); break;
2225 case 7: subs64_shift (cpu, shiftType, count); break;
2226 }
2227 }
2228
2229 static void
2230 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2231 {
2232 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2233 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2234 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2235 instr[28,24] = 01011
2236 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2237 instr[21] = 1
2238 instr[20,16] = Rm
2239 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2240 000 ==> LSL|UXTW, 001 ==> UXTZ,
2241 000 ==> SXTB, 001 ==> SXTH,
2242 000 ==> SXTW, 001 ==> SXTX,
2243 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2244 instr[9,5] = Rn
2245 instr[4,0] = Rd */
2246
2247 Extension extensionType = INSTR (15, 13);
2248 uint32_t shift = INSTR (12, 10);
2249
2250 NYI_assert (28, 24, 0x0B);
2251 NYI_assert (21, 21, 1);
2252
2253 /* Shift may not exceed 4. */
2254 if (shift > 4)
2255 HALT_UNALLOC;
2256
2257 /* Dispatch on size:op:set?. */
2258 switch (INSTR (31, 29))
2259 {
2260 case 0: add32_ext (cpu, extensionType, shift); break;
2261 case 1: adds32_ext (cpu, extensionType, shift); break;
2262 case 2: sub32_ext (cpu, extensionType, shift); break;
2263 case 3: subs32_ext (cpu, extensionType, shift); break;
2264 case 4: add64_ext (cpu, extensionType, shift); break;
2265 case 5: adds64_ext (cpu, extensionType, shift); break;
2266 case 6: sub64_ext (cpu, extensionType, shift); break;
2267 case 7: subs64_ext (cpu, extensionType, shift); break;
2268 }
2269 }
2270
2271 /* Conditional data processing
2272 Condition register is implicit 3rd source. */
2273
2274 /* 32 bit add with carry. */
2275 /* N.B register args may not be SP. */
2276
2277 static void
2278 adc32 (sim_cpu *cpu)
2279 {
2280 unsigned rm = INSTR (20, 16);
2281 unsigned rn = INSTR (9, 5);
2282 unsigned rd = INSTR (4, 0);
2283
2284 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2285 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2286 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2287 + IS_SET (C));
2288 }
2289
2290 /* 64 bit add with carry */
2291 static void
2292 adc64 (sim_cpu *cpu)
2293 {
2294 unsigned rm = INSTR (20, 16);
2295 unsigned rn = INSTR (9, 5);
2296 unsigned rd = INSTR (4, 0);
2297
2298 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2299 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2300 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2301 + IS_SET (C));
2302 }
2303
2304 /* 32 bit add with carry setting flags. */
2305 static void
2306 adcs32 (sim_cpu *cpu)
2307 {
2308 unsigned rm = INSTR (20, 16);
2309 unsigned rn = INSTR (9, 5);
2310 unsigned rd = INSTR (4, 0);
2311
2312 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2313 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2314 uint32_t carry = IS_SET (C);
2315
2316 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2317 set_flags_for_add32 (cpu, value1, value2 + carry);
2318 }
2319
2320 /* 64 bit add with carry setting flags. */
2321 static void
2322 adcs64 (sim_cpu *cpu)
2323 {
2324 unsigned rm = INSTR (20, 16);
2325 unsigned rn = INSTR (9, 5);
2326 unsigned rd = INSTR (4, 0);
2327
2328 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2329 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2330 uint64_t carry = IS_SET (C);
2331
2332 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2333 set_flags_for_add64 (cpu, value1, value2 + carry);
2334 }
2335
2336 /* 32 bit sub with carry. */
2337 static void
2338 sbc32 (sim_cpu *cpu)
2339 {
2340 unsigned rm = INSTR (20, 16);
2341 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2342 unsigned rd = INSTR (4, 0);
2343
2344 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2345 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2346 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2347 - 1 + IS_SET (C));
2348 }
2349
2350 /* 64 bit sub with carry */
2351 static void
2352 sbc64 (sim_cpu *cpu)
2353 {
2354 unsigned rm = INSTR (20, 16);
2355 unsigned rn = INSTR (9, 5);
2356 unsigned rd = INSTR (4, 0);
2357
2358 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2359 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2360 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2361 - 1 + IS_SET (C));
2362 }
2363
2364 /* 32 bit sub with carry setting flags */
2365 static void
2366 sbcs32 (sim_cpu *cpu)
2367 {
2368 unsigned rm = INSTR (20, 16);
2369 unsigned rn = INSTR (9, 5);
2370 unsigned rd = INSTR (4, 0);
2371
2372 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2373 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2374 uint32_t carry = IS_SET (C);
2375 uint32_t result = value1 - value2 + 1 - carry;
2376
2377 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2378 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2379 }
2380
2381 /* 64 bit sub with carry setting flags */
2382 static void
2383 sbcs64 (sim_cpu *cpu)
2384 {
2385 unsigned rm = INSTR (20, 16);
2386 unsigned rn = INSTR (9, 5);
2387 unsigned rd = INSTR (4, 0);
2388
2389 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2390 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2391 uint64_t carry = IS_SET (C);
2392 uint64_t result = value1 - value2 + 1 - carry;
2393
2394 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2395 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2396 }
2397
2398 static void
2399 dexAddSubtractWithCarry (sim_cpu *cpu)
2400 {
2401 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2402 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2403 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2404 instr[28,21] = 1 1010 000
2405 instr[20,16] = Rm
2406 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2407 instr[9,5] = Rn
2408 instr[4,0] = Rd */
2409
2410 uint32_t op2 = INSTR (15, 10);
2411
2412 NYI_assert (28, 21, 0xD0);
2413
2414 if (op2 != 0)
2415 HALT_UNALLOC;
2416
2417 /* Dispatch on size:op:set?. */
2418 switch (INSTR (31, 29))
2419 {
2420 case 0: adc32 (cpu); break;
2421 case 1: adcs32 (cpu); break;
2422 case 2: sbc32 (cpu); break;
2423 case 3: sbcs32 (cpu); break;
2424 case 4: adc64 (cpu); break;
2425 case 5: adcs64 (cpu); break;
2426 case 6: sbc64 (cpu); break;
2427 case 7: sbcs64 (cpu); break;
2428 }
2429 }
2430
2431 static uint32_t
2432 testConditionCode (sim_cpu *cpu, CondCode cc)
2433 {
2434 /* This should be reduceable to branchless logic
2435 by some careful testing of bits in CC followed
2436 by the requisite masking and combining of bits
2437 from the flag register.
2438
2439 For now we do it with a switch. */
2440 int res;
2441
2442 switch (cc)
2443 {
2444 case EQ: res = IS_SET (Z); break;
2445 case NE: res = IS_CLEAR (Z); break;
2446 case CS: res = IS_SET (C); break;
2447 case CC: res = IS_CLEAR (C); break;
2448 case MI: res = IS_SET (N); break;
2449 case PL: res = IS_CLEAR (N); break;
2450 case VS: res = IS_SET (V); break;
2451 case VC: res = IS_CLEAR (V); break;
2452 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2453 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2454 case GE: res = IS_SET (N) == IS_SET (V); break;
2455 case LT: res = IS_SET (N) != IS_SET (V); break;
2456 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2457 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2458 case AL:
2459 case NV:
2460 default:
2461 res = 1;
2462 break;
2463 }
2464 return res;
2465 }
2466
2467 static void
2468 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2469 {
2470 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2471 instr[30] = compare with positive (1) or negative value (0)
2472 instr[29,21] = 1 1101 0010
2473 instr[20,16] = Rm or const
2474 instr[15,12] = cond
2475 instr[11] = compare reg (0) or const (1)
2476 instr[10] = 0
2477 instr[9,5] = Rn
2478 instr[4] = 0
2479 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2480 signed int negate;
2481 unsigned rm;
2482 unsigned rn;
2483
2484 NYI_assert (29, 21, 0x1d2);
2485 NYI_assert (10, 10, 0);
2486 NYI_assert (4, 4, 0);
2487
2488 if (! testConditionCode (cpu, INSTR (15, 12)))
2489 {
2490 aarch64_set_CPSR (cpu, INSTR (3, 0));
2491 return;
2492 }
2493
2494 negate = INSTR (30, 30) ? 1 : -1;
2495 rm = INSTR (20, 16);
2496 rn = INSTR ( 9, 5);
2497
2498 if (INSTR (31, 31))
2499 {
2500 if (INSTR (11, 11))
2501 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2502 negate * (uint64_t) rm);
2503 else
2504 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2505 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2506 }
2507 else
2508 {
2509 if (INSTR (11, 11))
2510 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2511 negate * rm);
2512 else
2513 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2514 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2515 }
2516 }
2517
2518 static void
2519 do_vec_MOV_whole_vector (sim_cpu *cpu)
2520 {
2521 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2522
2523 instr[31] = 0
2524 instr[30] = half(0)/full(1)
2525 instr[29,21] = 001110101
2526 instr[20,16] = Vs
2527 instr[15,10] = 000111
2528 instr[9,5] = Vs
2529 instr[4,0] = Vd */
2530
2531 unsigned vs = INSTR (9, 5);
2532 unsigned vd = INSTR (4, 0);
2533
2534 NYI_assert (29, 21, 0x075);
2535 NYI_assert (15, 10, 0x07);
2536
2537 if (INSTR (20, 16) != vs)
2538 HALT_NYI;
2539
2540 if (INSTR (30, 30))
2541 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2542
2543 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2544 }
2545
2546 static void
2547 do_vec_MOV_into_scalar (sim_cpu *cpu)
2548 {
2549 /* instr[31] = 0
2550 instr[30] = word(0)/long(1)
2551 instr[29,21] = 00 1110 000
2552 instr[20,18] = element size and index
2553 instr[17,10] = 00 0011 11
2554 instr[9,5] = V source
2555 instr[4,0] = R dest */
2556
2557 unsigned vs = INSTR (9, 5);
2558 unsigned rd = INSTR (4, 0);
2559
2560 NYI_assert (29, 21, 0x070);
2561 NYI_assert (17, 10, 0x0F);
2562
2563 switch (INSTR (20, 18))
2564 {
2565 case 0x2:
2566 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
2567 break;
2568
2569 case 0x6:
2570 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
2571 break;
2572
2573 case 0x1:
2574 case 0x3:
2575 case 0x5:
2576 case 0x7:
2577 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
2578 (cpu, vs, INSTR (20, 19)));
2579 break;
2580
2581 default:
2582 HALT_NYI;
2583 }
2584 }
2585
2586 static void
2587 do_vec_INS (sim_cpu *cpu)
2588 {
2589 /* instr[31,21] = 01001110000
2590 instr[20,16] = element size and index
2591 instr[15,10] = 000111
2592 instr[9,5] = W source
2593 instr[4,0] = V dest */
2594
2595 int index;
2596 unsigned rs = INSTR (9, 5);
2597 unsigned vd = INSTR (4, 0);
2598
2599 NYI_assert (31, 21, 0x270);
2600 NYI_assert (15, 10, 0x07);
2601
2602 if (INSTR (16, 16))
2603 {
2604 index = INSTR (20, 17);
2605 aarch64_set_vec_u8 (cpu, vd, index,
2606 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2607 }
2608 else if (INSTR (17, 17))
2609 {
2610 index = INSTR (20, 18);
2611 aarch64_set_vec_u16 (cpu, vd, index,
2612 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2613 }
2614 else if (INSTR (18, 18))
2615 {
2616 index = INSTR (20, 19);
2617 aarch64_set_vec_u32 (cpu, vd, index,
2618 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2619 }
2620 else if (INSTR (19, 19))
2621 {
2622 index = INSTR (20, 20);
2623 aarch64_set_vec_u64 (cpu, vd, index,
2624 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2625 }
2626 else
2627 HALT_NYI;
2628 }
2629
2630 static void
2631 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2632 {
2633 /* instr[31] = 0
2634 instr[30] = half(0)/full(1)
2635 instr[29,21] = 00 1110 000
2636 instr[20,16] = element size and index
2637 instr[15,10] = 0000 01
2638 instr[9,5] = V source
2639 instr[4,0] = V dest. */
2640
2641 unsigned full = INSTR (30, 30);
2642 unsigned vs = INSTR (9, 5);
2643 unsigned vd = INSTR (4, 0);
2644 int i, index;
2645
2646 NYI_assert (29, 21, 0x070);
2647 NYI_assert (15, 10, 0x01);
2648
2649 if (INSTR (16, 16))
2650 {
2651 index = INSTR (20, 17);
2652
2653 for (i = 0; i < (full ? 16 : 8); i++)
2654 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2655 }
2656 else if (INSTR (17, 17))
2657 {
2658 index = INSTR (20, 18);
2659
2660 for (i = 0; i < (full ? 8 : 4); i++)
2661 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2662 }
2663 else if (INSTR (18, 18))
2664 {
2665 index = INSTR (20, 19);
2666
2667 for (i = 0; i < (full ? 4 : 2); i++)
2668 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2669 }
2670 else
2671 {
2672 if (INSTR (19, 19) == 0)
2673 HALT_UNALLOC;
2674
2675 if (! full)
2676 HALT_UNALLOC;
2677
2678 index = INSTR (20, 20);
2679
2680 for (i = 0; i < 2; i++)
2681 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2682 }
2683 }
2684
2685 static void
2686 do_vec_TBL (sim_cpu *cpu)
2687 {
2688 /* instr[31] = 0
2689 instr[30] = half(0)/full(1)
2690 instr[29,21] = 00 1110 000
2691 instr[20,16] = Vm
2692 instr[15] = 0
2693 instr[14,13] = vec length
2694 instr[12,10] = 000
2695 instr[9,5] = V start
2696 instr[4,0] = V dest */
2697
2698 int full = INSTR (30, 30);
2699 int len = INSTR (14, 13) + 1;
2700 unsigned vm = INSTR (20, 16);
2701 unsigned vn = INSTR (9, 5);
2702 unsigned vd = INSTR (4, 0);
2703 unsigned i;
2704
2705 NYI_assert (29, 21, 0x070);
2706 NYI_assert (12, 10, 0);
2707
2708 for (i = 0; i < (full ? 16 : 8); i++)
2709 {
2710 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2711 uint8_t val;
2712
2713 if (selector < 16)
2714 val = aarch64_get_vec_u8 (cpu, vn, selector);
2715 else if (selector < 32)
2716 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2717 else if (selector < 48)
2718 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2719 else if (selector < 64)
2720 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2721 else
2722 val = 0;
2723
2724 aarch64_set_vec_u8 (cpu, vd, i, val);
2725 }
2726 }
2727
2728 static void
2729 do_vec_TRN (sim_cpu *cpu)
2730 {
2731 /* instr[31] = 0
2732 instr[30] = half(0)/full(1)
2733 instr[29,24] = 00 1110
2734 instr[23,22] = size
2735 instr[21] = 0
2736 instr[20,16] = Vm
2737 instr[15] = 0
2738 instr[14] = TRN1 (0) / TRN2 (1)
2739 instr[13,10] = 1010
2740 instr[9,5] = V source
2741 instr[4,0] = V dest. */
2742
2743 int full = INSTR (30, 30);
2744 int second = INSTR (14, 14);
2745 unsigned vm = INSTR (20, 16);
2746 unsigned vn = INSTR (9, 5);
2747 unsigned vd = INSTR (4, 0);
2748 unsigned i;
2749
2750 NYI_assert (29, 24, 0x0E);
2751 NYI_assert (13, 10, 0xA);
2752
2753 switch (INSTR (23, 22))
2754 {
2755 case 0:
2756 for (i = 0; i < (full ? 8 : 4); i++)
2757 {
2758 aarch64_set_vec_u8
2759 (cpu, vd, i * 2,
2760 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2761 aarch64_set_vec_u8
2762 (cpu, vd, 1 * 2 + 1,
2763 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2764 }
2765 break;
2766
2767 case 1:
2768 for (i = 0; i < (full ? 4 : 2); i++)
2769 {
2770 aarch64_set_vec_u16
2771 (cpu, vd, i * 2,
2772 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2773 aarch64_set_vec_u16
2774 (cpu, vd, 1 * 2 + 1,
2775 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2776 }
2777 break;
2778
2779 case 2:
2780 aarch64_set_vec_u32
2781 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2782 aarch64_set_vec_u32
2783 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2784 aarch64_set_vec_u32
2785 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2786 aarch64_set_vec_u32
2787 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2788 break;
2789
2790 case 3:
2791 if (! full)
2792 HALT_UNALLOC;
2793
2794 aarch64_set_vec_u64 (cpu, vd, 0,
2795 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2796 aarch64_set_vec_u64 (cpu, vd, 1,
2797 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2798 break;
2799 }
2800 }
2801
2802 static void
2803 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2804 {
2805 /* instr[31] = 0
2806 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2807 [must be 1 for 64-bit xfer]
2808 instr[29,20] = 00 1110 0000
2809 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2810 0100=> 32-bits. 1000=>64-bits
2811 instr[15,10] = 0000 11
2812 instr[9,5] = W source
2813 instr[4,0] = V dest. */
2814
2815 unsigned i;
2816 unsigned Vd = INSTR (4, 0);
2817 unsigned Rs = INSTR (9, 5);
2818 int both = INSTR (30, 30);
2819
2820 NYI_assert (29, 20, 0x0E0);
2821 NYI_assert (15, 10, 0x03);
2822
2823 switch (INSTR (19, 16))
2824 {
2825 case 1:
2826 for (i = 0; i < (both ? 16 : 8); i++)
2827 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
2828 break;
2829
2830 case 2:
2831 for (i = 0; i < (both ? 8 : 4); i++)
2832 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
2833 break;
2834
2835 case 4:
2836 for (i = 0; i < (both ? 4 : 2); i++)
2837 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
2838 break;
2839
2840 case 8:
2841 if (!both)
2842 HALT_NYI;
2843 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2844 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2845 break;
2846
2847 default:
2848 HALT_NYI;
2849 }
2850 }
2851
2852 static void
2853 do_vec_UZP (sim_cpu *cpu)
2854 {
2855 /* instr[31] = 0
2856 instr[30] = half(0)/full(1)
2857 instr[29,24] = 00 1110
2858 instr[23,22] = size: byte(00), half(01), word (10), long (11)
2859 instr[21] = 0
2860 instr[20,16] = Vm
2861 instr[15] = 0
2862 instr[14] = lower (0) / upper (1)
2863 instr[13,10] = 0110
2864 instr[9,5] = Vn
2865 instr[4,0] = Vd. */
2866
2867 int full = INSTR (30, 30);
2868 int upper = INSTR (14, 14);
2869
2870 unsigned vm = INSTR (20, 16);
2871 unsigned vn = INSTR (9, 5);
2872 unsigned vd = INSTR (4, 0);
2873
2874 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2875 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2876 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2877 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2878
2879 uint64_t val1 = 0;
2880 uint64_t val2 = 0;
2881
2882 uint64_t input1 = upper ? val_n1 : val_m1;
2883 uint64_t input2 = upper ? val_n2 : val_m2;
2884 unsigned i;
2885
2886 NYI_assert (29, 24, 0x0E);
2887 NYI_assert (21, 21, 0);
2888 NYI_assert (15, 15, 0);
2889 NYI_assert (13, 10, 6);
2890
2891 switch (INSTR (23, 23))
2892 {
2893 case 0:
2894 for (i = 0; i < 8; i++)
2895 {
2896 val1 |= (input1 >> (i * 8)) & (0xFFULL << (i * 8));
2897 val2 |= (input2 >> (i * 8)) & (0xFFULL << (i * 8));
2898 }
2899 break;
2900
2901 case 1:
2902 for (i = 0; i < 4; i++)
2903 {
2904 val1 |= (input1 >> (i * 16)) & (0xFFFFULL << (i * 16));
2905 val2 |= (input2 >> (i * 16)) & (0xFFFFULL << (i * 16));
2906 }
2907 break;
2908
2909 case 2:
2910 val1 = ((input1 & 0xFFFFFFFF) | ((input1 >> 32) & 0xFFFFFFFF00000000ULL));
2911 val2 = ((input2 & 0xFFFFFFFF) | ((input2 >> 32) & 0xFFFFFFFF00000000ULL));
2912
2913 case 3:
2914 val1 = input1;
2915 val2 = input2;
2916 break;
2917 }
2918
2919 aarch64_set_vec_u64 (cpu, vd, 0, val1);
2920 if (full)
2921 aarch64_set_vec_u64 (cpu, vd, 1, val2);
2922 }
2923
2924 static void
2925 do_vec_ZIP (sim_cpu *cpu)
2926 {
2927 /* instr[31] = 0
2928 instr[30] = half(0)/full(1)
2929 instr[29,24] = 00 1110
2930 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
2931 instr[21] = 0
2932 instr[20,16] = Vm
2933 instr[15] = 0
2934 instr[14] = lower (0) / upper (1)
2935 instr[13,10] = 1110
2936 instr[9,5] = Vn
2937 instr[4,0] = Vd. */
2938
2939 int full = INSTR (30, 30);
2940 int upper = INSTR (14, 14);
2941
2942 unsigned vm = INSTR (20, 16);
2943 unsigned vn = INSTR (9, 5);
2944 unsigned vd = INSTR (4, 0);
2945
2946 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2947 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2948 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2949 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2950
2951 uint64_t val1 = 0;
2952 uint64_t val2 = 0;
2953
2954 uint64_t input1 = upper ? val_n1 : val_m1;
2955 uint64_t input2 = upper ? val_n2 : val_m2;
2956
2957 NYI_assert (29, 24, 0x0E);
2958 NYI_assert (21, 21, 0);
2959 NYI_assert (15, 15, 0);
2960 NYI_assert (13, 10, 0xE);
2961
2962 switch (INSTR (23, 23))
2963 {
2964 case 0:
2965 val1 =
2966 ((input1 << 0) & (0xFF << 0))
2967 | ((input2 << 8) & (0xFF << 8))
2968 | ((input1 << 8) & (0xFF << 16))
2969 | ((input2 << 16) & (0xFF << 24))
2970 | ((input1 << 16) & (0xFFULL << 32))
2971 | ((input2 << 24) & (0xFFULL << 40))
2972 | ((input1 << 24) & (0xFFULL << 48))
2973 | ((input2 << 32) & (0xFFULL << 56));
2974
2975 val2 =
2976 ((input1 >> 32) & (0xFF << 0))
2977 | ((input2 >> 24) & (0xFF << 8))
2978 | ((input1 >> 24) & (0xFF << 16))
2979 | ((input2 >> 16) & (0xFF << 24))
2980 | ((input1 >> 16) & (0xFFULL << 32))
2981 | ((input2 >> 8) & (0xFFULL << 40))
2982 | ((input1 >> 8) & (0xFFULL << 48))
2983 | ((input2 >> 0) & (0xFFULL << 56));
2984 break;
2985
2986 case 1:
2987 val1 =
2988 ((input1 << 0) & (0xFFFF << 0))
2989 | ((input2 << 16) & (0xFFFF << 16))
2990 | ((input1 << 16) & (0xFFFFULL << 32))
2991 | ((input2 << 32) & (0xFFFFULL << 48));
2992
2993 val2 =
2994 ((input1 >> 32) & (0xFFFF << 0))
2995 | ((input2 >> 16) & (0xFFFF << 16))
2996 | ((input1 >> 16) & (0xFFFFULL << 32))
2997 | ((input2 >> 0) & (0xFFFFULL << 48));
2998 break;
2999
3000 case 2:
3001 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3002 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3003 break;
3004
3005 case 3:
3006 val1 = input1;
3007 val2 = input2;
3008 break;
3009 }
3010
3011 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3012 if (full)
3013 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3014 }
3015
3016 /* Floating point immediates are encoded in 8 bits.
3017 fpimm[7] = sign bit.
3018 fpimm[6:4] = signed exponent.
3019 fpimm[3:0] = fraction (assuming leading 1).
3020 i.e. F = s * 1.f * 2^(e - b). */
3021
3022 static float
3023 fp_immediate_for_encoding_32 (uint32_t imm8)
3024 {
3025 float u;
3026 uint32_t s, e, f, i;
3027
3028 s = (imm8 >> 7) & 0x1;
3029 e = (imm8 >> 4) & 0x7;
3030 f = imm8 & 0xf;
3031
3032 /* The fp value is s * n/16 * 2r where n is 16+e. */
3033 u = (16.0 + f) / 16.0;
3034
3035 /* N.B. exponent is signed. */
3036 if (e < 4)
3037 {
3038 int epos = e;
3039
3040 for (i = 0; i <= epos; i++)
3041 u *= 2.0;
3042 }
3043 else
3044 {
3045 int eneg = 7 - e;
3046
3047 for (i = 0; i < eneg; i++)
3048 u /= 2.0;
3049 }
3050
3051 if (s)
3052 u = - u;
3053
3054 return u;
3055 }
3056
3057 static double
3058 fp_immediate_for_encoding_64 (uint32_t imm8)
3059 {
3060 double u;
3061 uint32_t s, e, f, i;
3062
3063 s = (imm8 >> 7) & 0x1;
3064 e = (imm8 >> 4) & 0x7;
3065 f = imm8 & 0xf;
3066
3067 /* The fp value is s * n/16 * 2r where n is 16+e. */
3068 u = (16.0 + f) / 16.0;
3069
3070 /* N.B. exponent is signed. */
3071 if (e < 4)
3072 {
3073 int epos = e;
3074
3075 for (i = 0; i <= epos; i++)
3076 u *= 2.0;
3077 }
3078 else
3079 {
3080 int eneg = 7 - e;
3081
3082 for (i = 0; i < eneg; i++)
3083 u /= 2.0;
3084 }
3085
3086 if (s)
3087 u = - u;
3088
3089 return u;
3090 }
3091
3092 static void
3093 do_vec_MOV_immediate (sim_cpu *cpu)
3094 {
3095 /* instr[31] = 0
3096 instr[30] = full/half selector
3097 instr[29,19] = 00111100000
3098 instr[18,16] = high 3 bits of uimm8
3099 instr[15,12] = size & shift:
3100 0000 => 32-bit
3101 0010 => 32-bit + LSL#8
3102 0100 => 32-bit + LSL#16
3103 0110 => 32-bit + LSL#24
3104 1010 => 16-bit + LSL#8
3105 1000 => 16-bit
3106 1101 => 32-bit + MSL#16
3107 1100 => 32-bit + MSL#8
3108 1110 => 8-bit
3109 1111 => double
3110 instr[11,10] = 01
3111 instr[9,5] = low 5-bits of uimm8
3112 instr[4,0] = Vd. */
3113
3114 int full = INSTR (30, 30);
3115 unsigned vd = INSTR (4, 0);
3116 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3117 unsigned i;
3118
3119 NYI_assert (29, 19, 0x1E0);
3120 NYI_assert (11, 10, 1);
3121
3122 switch (INSTR (15, 12))
3123 {
3124 case 0x0: /* 32-bit, no shift. */
3125 case 0x2: /* 32-bit, shift by 8. */
3126 case 0x4: /* 32-bit, shift by 16. */
3127 case 0x6: /* 32-bit, shift by 24. */
3128 val <<= (8 * INSTR (14, 13));
3129 for (i = 0; i < (full ? 4 : 2); i++)
3130 aarch64_set_vec_u32 (cpu, vd, i, val);
3131 break;
3132
3133 case 0xa: /* 16-bit, shift by 8. */
3134 val <<= 8;
3135 /* Fall through. */
3136 case 0x8: /* 16-bit, no shift. */
3137 for (i = 0; i < (full ? 8 : 4); i++)
3138 aarch64_set_vec_u16 (cpu, vd, i, val);
3139 /* Fall through. */
3140 case 0xd: /* 32-bit, mask shift by 16. */
3141 val <<= 8;
3142 val |= 0xFF;
3143 /* Fall through. */
3144 case 0xc: /* 32-bit, mask shift by 8. */
3145 val <<= 8;
3146 val |= 0xFF;
3147 for (i = 0; i < (full ? 4 : 2); i++)
3148 aarch64_set_vec_u32 (cpu, vd, i, val);
3149 break;
3150
3151 case 0xe: /* 8-bit, no shift. */
3152 for (i = 0; i < (full ? 16 : 8); i++)
3153 aarch64_set_vec_u8 (cpu, vd, i, val);
3154 break;
3155
3156 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3157 {
3158 float u = fp_immediate_for_encoding_32 (val);
3159 for (i = 0; i < (full ? 4 : 2); i++)
3160 aarch64_set_vec_float (cpu, vd, i, u);
3161 break;
3162 }
3163
3164 default:
3165 HALT_NYI;
3166 }
3167 }
3168
3169 static void
3170 do_vec_MVNI (sim_cpu *cpu)
3171 {
3172 /* instr[31] = 0
3173 instr[30] = full/half selector
3174 instr[29,19] = 10111100000
3175 instr[18,16] = high 3 bits of uimm8
3176 instr[15,12] = selector
3177 instr[11,10] = 01
3178 instr[9,5] = low 5-bits of uimm8
3179 instr[4,0] = Vd. */
3180
3181 int full = INSTR (30, 30);
3182 unsigned vd = INSTR (4, 0);
3183 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3184 unsigned i;
3185
3186 NYI_assert (29, 19, 0x5E0);
3187 NYI_assert (11, 10, 1);
3188
3189 switch (INSTR (15, 12))
3190 {
3191 case 0x0: /* 32-bit, no shift. */
3192 case 0x2: /* 32-bit, shift by 8. */
3193 case 0x4: /* 32-bit, shift by 16. */
3194 case 0x6: /* 32-bit, shift by 24. */
3195 val <<= (8 * INSTR (14, 13));
3196 val = ~ val;
3197 for (i = 0; i < (full ? 4 : 2); i++)
3198 aarch64_set_vec_u32 (cpu, vd, i, val);
3199 return;
3200
3201 case 0xa: /* 16-bit, 8 bit shift. */
3202 val <<= 8;
3203 case 0x8: /* 16-bit, no shift. */
3204 val = ~ val;
3205 for (i = 0; i < (full ? 8 : 4); i++)
3206 aarch64_set_vec_u16 (cpu, vd, i, val);
3207 return;
3208
3209 case 0xd: /* 32-bit, mask shift by 16. */
3210 val <<= 8;
3211 val |= 0xFF;
3212 case 0xc: /* 32-bit, mask shift by 8. */
3213 val <<= 8;
3214 val |= 0xFF;
3215 val = ~ val;
3216 for (i = 0; i < (full ? 4 : 2); i++)
3217 aarch64_set_vec_u32 (cpu, vd, i, val);
3218 return;
3219
3220 case 0xE: /* MOVI Dn, #mask64 */
3221 {
3222 uint64_t mask = 0;
3223
3224 for (i = 0; i < 8; i++)
3225 if (val & (1 << i))
3226 mask |= (0xFFUL << (i * 8));
3227 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3228 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3229 return;
3230 }
3231
3232 case 0xf: /* FMOV Vd.2D, #fpimm. */
3233 {
3234 double u = fp_immediate_for_encoding_64 (val);
3235
3236 if (! full)
3237 HALT_UNALLOC;
3238
3239 aarch64_set_vec_double (cpu, vd, 0, u);
3240 aarch64_set_vec_double (cpu, vd, 1, u);
3241 return;
3242 }
3243
3244 default:
3245 HALT_NYI;
3246 }
3247 }
3248
3249 #define ABS(A) ((A) < 0 ? - (A) : (A))
3250
3251 static void
3252 do_vec_ABS (sim_cpu *cpu)
3253 {
3254 /* instr[31] = 0
3255 instr[30] = half(0)/full(1)
3256 instr[29,24] = 00 1110
3257 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3258 instr[21,10] = 10 0000 1011 10
3259 instr[9,5] = Vn
3260 instr[4.0] = Vd. */
3261
3262 unsigned vn = INSTR (9, 5);
3263 unsigned vd = INSTR (4, 0);
3264 unsigned full = INSTR (30, 30);
3265 unsigned i;
3266
3267 NYI_assert (29, 24, 0x0E);
3268 NYI_assert (21, 10, 0x82E);
3269
3270 switch (INSTR (23, 22))
3271 {
3272 case 0:
3273 for (i = 0; i < (full ? 16 : 8); i++)
3274 aarch64_set_vec_s8 (cpu, vd, i,
3275 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3276 break;
3277
3278 case 1:
3279 for (i = 0; i < (full ? 8 : 4); i++)
3280 aarch64_set_vec_s16 (cpu, vd, i,
3281 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3282 break;
3283
3284 case 2:
3285 for (i = 0; i < (full ? 4 : 2); i++)
3286 aarch64_set_vec_s32 (cpu, vd, i,
3287 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3288 break;
3289
3290 case 3:
3291 if (! full)
3292 HALT_NYI;
3293 for (i = 0; i < 2; i++)
3294 aarch64_set_vec_s64 (cpu, vd, i,
3295 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3296 break;
3297 }
3298 }
3299
3300 static void
3301 do_vec_ADDV (sim_cpu *cpu)
3302 {
3303 /* instr[31] = 0
3304 instr[30] = full/half selector
3305 instr[29,24] = 00 1110
3306 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3307 instr[21,10] = 11 0001 1011 10
3308 instr[9,5] = Vm
3309 instr[4.0] = Rd. */
3310
3311 unsigned vm = INSTR (9, 5);
3312 unsigned rd = INSTR (4, 0);
3313 unsigned i;
3314 uint64_t val = 0;
3315 int full = INSTR (30, 30);
3316
3317 NYI_assert (29, 24, 0x0E);
3318 NYI_assert (21, 10, 0xC6E);
3319
3320 switch (INSTR (23, 22))
3321 {
3322 case 0:
3323 for (i = 0; i < (full ? 16 : 8); i++)
3324 val += aarch64_get_vec_u8 (cpu, vm, i);
3325 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3326 return;
3327
3328 case 1:
3329 for (i = 0; i < (full ? 8 : 4); i++)
3330 val += aarch64_get_vec_u16 (cpu, vm, i);
3331 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3332 return;
3333
3334 case 2:
3335 for (i = 0; i < (full ? 4 : 2); i++)
3336 val += aarch64_get_vec_u32 (cpu, vm, i);
3337 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3338 return;
3339
3340 case 3:
3341 if (! full)
3342 HALT_UNALLOC;
3343 val = aarch64_get_vec_u64 (cpu, vm, 0);
3344 val += aarch64_get_vec_u64 (cpu, vm, 1);
3345 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3346 return;
3347 }
3348 }
3349
3350 static void
3351 do_vec_ins_2 (sim_cpu *cpu)
3352 {
3353 /* instr[31,21] = 01001110000
3354 instr[20,18] = size & element selector
3355 instr[17,14] = 0000
3356 instr[13] = direction: to vec(0), from vec (1)
3357 instr[12,10] = 111
3358 instr[9,5] = Vm
3359 instr[4,0] = Vd. */
3360
3361 unsigned elem;
3362 unsigned vm = INSTR (9, 5);
3363 unsigned vd = INSTR (4, 0);
3364
3365 NYI_assert (31, 21, 0x270);
3366 NYI_assert (17, 14, 0);
3367 NYI_assert (12, 10, 7);
3368
3369 if (INSTR (13, 13) == 1)
3370 {
3371 if (INSTR (18, 18) == 1)
3372 {
3373 /* 32-bit moves. */
3374 elem = INSTR (20, 19);
3375 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3376 aarch64_get_vec_u32 (cpu, vm, elem));
3377 }
3378 else
3379 {
3380 /* 64-bit moves. */
3381 if (INSTR (19, 19) != 1)
3382 HALT_NYI;
3383
3384 elem = INSTR (20, 20);
3385 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3386 aarch64_get_vec_u64 (cpu, vm, elem));
3387 }
3388 }
3389 else
3390 {
3391 if (INSTR (18, 18) == 1)
3392 {
3393 /* 32-bit moves. */
3394 elem = INSTR (20, 19);
3395 aarch64_set_vec_u32 (cpu, vd, elem,
3396 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3397 }
3398 else
3399 {
3400 /* 64-bit moves. */
3401 if (INSTR (19, 19) != 1)
3402 HALT_NYI;
3403
3404 elem = INSTR (20, 20);
3405 aarch64_set_vec_u64 (cpu, vd, elem,
3406 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3407 }
3408 }
3409 }
3410
3411 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3412 do \
3413 { \
3414 DST_TYPE a[N], b[N]; \
3415 \
3416 for (i = 0; i < (N); i++) \
3417 { \
3418 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3419 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3420 } \
3421 for (i = 0; i < (N); i++) \
3422 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3423 } \
3424 while (0)
3425
3426 static void
3427 do_vec_mull (sim_cpu *cpu)
3428 {
3429 /* instr[31] = 0
3430 instr[30] = lower(0)/upper(1) selector
3431 instr[29] = signed(0)/unsigned(1)
3432 instr[28,24] = 0 1110
3433 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3434 instr[21] = 1
3435 instr[20,16] = Vm
3436 instr[15,10] = 11 0000
3437 instr[9,5] = Vn
3438 instr[4.0] = Vd. */
3439
3440 int unsign = INSTR (29, 29);
3441 int bias = INSTR (30, 30);
3442 unsigned vm = INSTR (20, 16);
3443 unsigned vn = INSTR ( 9, 5);
3444 unsigned vd = INSTR ( 4, 0);
3445 unsigned i;
3446
3447 NYI_assert (28, 24, 0x0E);
3448 NYI_assert (15, 10, 0x30);
3449
3450 /* NB: Read source values before writing results, in case
3451 the source and destination vectors are the same. */
3452 switch (INSTR (23, 22))
3453 {
3454 case 0:
3455 if (bias)
3456 bias = 8;
3457 if (unsign)
3458 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3459 else
3460 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3461 return;
3462
3463 case 1:
3464 if (bias)
3465 bias = 4;
3466 if (unsign)
3467 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3468 else
3469 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3470 return;
3471
3472 case 2:
3473 if (bias)
3474 bias = 2;
3475 if (unsign)
3476 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3477 else
3478 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3479 return;
3480
3481 case 3:
3482 HALT_NYI;
3483 }
3484 }
3485
3486 static void
3487 do_vec_fadd (sim_cpu *cpu)
3488 {
3489 /* instr[31] = 0
3490 instr[30] = half(0)/full(1)
3491 instr[29,24] = 001110
3492 instr[23] = FADD(0)/FSUB(1)
3493 instr[22] = float (0)/double(1)
3494 instr[21] = 1
3495 instr[20,16] = Vm
3496 instr[15,10] = 110101
3497 instr[9,5] = Vn
3498 instr[4.0] = Vd. */
3499
3500 unsigned vm = INSTR (20, 16);
3501 unsigned vn = INSTR (9, 5);
3502 unsigned vd = INSTR (4, 0);
3503 unsigned i;
3504 int full = INSTR (30, 30);
3505
3506 NYI_assert (29, 24, 0x0E);
3507 NYI_assert (21, 21, 1);
3508 NYI_assert (15, 10, 0x35);
3509
3510 if (INSTR (23, 23))
3511 {
3512 if (INSTR (22, 22))
3513 {
3514 if (! full)
3515 HALT_NYI;
3516
3517 for (i = 0; i < 2; i++)
3518 aarch64_set_vec_double (cpu, vd, i,
3519 aarch64_get_vec_double (cpu, vn, i)
3520 - aarch64_get_vec_double (cpu, vm, i));
3521 }
3522 else
3523 {
3524 for (i = 0; i < (full ? 4 : 2); i++)
3525 aarch64_set_vec_float (cpu, vd, i,
3526 aarch64_get_vec_float (cpu, vn, i)
3527 - aarch64_get_vec_float (cpu, vm, i));
3528 }
3529 }
3530 else
3531 {
3532 if (INSTR (22, 22))
3533 {
3534 if (! full)
3535 HALT_NYI;
3536
3537 for (i = 0; i < 2; i++)
3538 aarch64_set_vec_double (cpu, vd, i,
3539 aarch64_get_vec_double (cpu, vm, i)
3540 + aarch64_get_vec_double (cpu, vn, i));
3541 }
3542 else
3543 {
3544 for (i = 0; i < (full ? 4 : 2); i++)
3545 aarch64_set_vec_float (cpu, vd, i,
3546 aarch64_get_vec_float (cpu, vm, i)
3547 + aarch64_get_vec_float (cpu, vn, i));
3548 }
3549 }
3550 }
3551
3552 static void
3553 do_vec_add (sim_cpu *cpu)
3554 {
3555 /* instr[31] = 0
3556 instr[30] = full/half selector
3557 instr[29,24] = 001110
3558 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3559 instr[21] = 1
3560 instr[20,16] = Vn
3561 instr[15,10] = 100001
3562 instr[9,5] = Vm
3563 instr[4.0] = Vd. */
3564
3565 unsigned vm = INSTR (20, 16);
3566 unsigned vn = INSTR (9, 5);
3567 unsigned vd = INSTR (4, 0);
3568 unsigned i;
3569 int full = INSTR (30, 30);
3570
3571 NYI_assert (29, 24, 0x0E);
3572 NYI_assert (21, 21, 1);
3573 NYI_assert (15, 10, 0x21);
3574
3575 switch (INSTR (23, 22))
3576 {
3577 case 0:
3578 for (i = 0; i < (full ? 16 : 8); i++)
3579 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3580 + aarch64_get_vec_u8 (cpu, vm, i));
3581 return;
3582
3583 case 1:
3584 for (i = 0; i < (full ? 8 : 4); i++)
3585 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3586 + aarch64_get_vec_u16 (cpu, vm, i));
3587 return;
3588
3589 case 2:
3590 for (i = 0; i < (full ? 4 : 2); i++)
3591 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3592 + aarch64_get_vec_u32 (cpu, vm, i));
3593 return;
3594
3595 case 3:
3596 if (! full)
3597 HALT_UNALLOC;
3598 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3599 + aarch64_get_vec_u64 (cpu, vm, 0));
3600 aarch64_set_vec_u64 (cpu, vd, 1,
3601 aarch64_get_vec_u64 (cpu, vn, 1)
3602 + aarch64_get_vec_u64 (cpu, vm, 1));
3603 return;
3604 }
3605 }
3606
3607 static void
3608 do_vec_mul (sim_cpu *cpu)
3609 {
3610 /* instr[31] = 0
3611 instr[30] = full/half selector
3612 instr[29,24] = 00 1110
3613 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3614 instr[21] = 1
3615 instr[20,16] = Vn
3616 instr[15,10] = 10 0111
3617 instr[9,5] = Vm
3618 instr[4.0] = Vd. */
3619
3620 unsigned vm = INSTR (20, 16);
3621 unsigned vn = INSTR (9, 5);
3622 unsigned vd = INSTR (4, 0);
3623 unsigned i;
3624 int full = INSTR (30, 30);
3625 int bias = 0;
3626
3627 NYI_assert (29, 24, 0x0E);
3628 NYI_assert (21, 21, 1);
3629 NYI_assert (15, 10, 0x27);
3630
3631 switch (INSTR (23, 22))
3632 {
3633 case 0:
3634 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint16_t, u8, u16);
3635 return;
3636
3637 case 1:
3638 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint32_t, u16, u32);
3639 return;
3640
3641 case 2:
3642 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint64_t, u32, u64);
3643 return;
3644
3645 case 3:
3646 HALT_UNALLOC;
3647 }
3648 }
3649
3650 static void
3651 do_vec_MLA (sim_cpu *cpu)
3652 {
3653 /* instr[31] = 0
3654 instr[30] = full/half selector
3655 instr[29,24] = 00 1110
3656 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3657 instr[21] = 1
3658 instr[20,16] = Vn
3659 instr[15,10] = 1001 01
3660 instr[9,5] = Vm
3661 instr[4.0] = Vd. */
3662
3663 unsigned vm = INSTR (20, 16);
3664 unsigned vn = INSTR (9, 5);
3665 unsigned vd = INSTR (4, 0);
3666 unsigned i;
3667 int full = INSTR (30, 30);
3668
3669 NYI_assert (29, 24, 0x0E);
3670 NYI_assert (21, 21, 1);
3671 NYI_assert (15, 10, 0x25);
3672
3673 switch (INSTR (23, 22))
3674 {
3675 case 0:
3676 {
3677 uint16_t a[16], b[16];
3678
3679 for (i = 0; i < (full ? 16 : 8); i++)
3680 {
3681 a[i] = aarch64_get_vec_u8 (cpu, vn, i);
3682 b[i] = aarch64_get_vec_u8 (cpu, vm, i);
3683 }
3684
3685 for (i = 0; i < (full ? 16 : 8); i++)
3686 {
3687 uint16_t v = aarch64_get_vec_u8 (cpu, vd, i);
3688
3689 aarch64_set_vec_u16 (cpu, vd, i, v + (a[i] * b[i]));
3690 }
3691 }
3692 return;
3693
3694 case 1:
3695 {
3696 uint32_t a[8], b[8];
3697
3698 for (i = 0; i < (full ? 8 : 4); i++)
3699 {
3700 a[i] = aarch64_get_vec_u16 (cpu, vn, i);
3701 b[i] = aarch64_get_vec_u16 (cpu, vm, i);
3702 }
3703
3704 for (i = 0; i < (full ? 8 : 4); i++)
3705 {
3706 uint32_t v = aarch64_get_vec_u16 (cpu, vd, i);
3707
3708 aarch64_set_vec_u32 (cpu, vd, i, v + (a[i] * b[i]));
3709 }
3710 }
3711 return;
3712
3713 case 2:
3714 {
3715 uint64_t a[4], b[4];
3716
3717 for (i = 0; i < (full ? 4 : 2); i++)
3718 {
3719 a[i] = aarch64_get_vec_u32 (cpu, vn, i);
3720 b[i] = aarch64_get_vec_u32 (cpu, vm, i);
3721 }
3722
3723 for (i = 0; i < (full ? 4 : 2); i++)
3724 {
3725 uint64_t v = aarch64_get_vec_u32 (cpu, vd, i);
3726
3727 aarch64_set_vec_u64 (cpu, vd, i, v + (a[i] * b[i]));
3728 }
3729 }
3730 return;
3731
3732 case 3:
3733 HALT_UNALLOC;
3734 }
3735 }
3736
3737 static float
3738 fmaxnm (float a, float b)
3739 {
3740 if (fpclassify (a) == FP_NORMAL)
3741 {
3742 if (fpclassify (b) == FP_NORMAL)
3743 return a > b ? a : b;
3744 return a;
3745 }
3746 else if (fpclassify (b) == FP_NORMAL)
3747 return b;
3748 return a;
3749 }
3750
3751 static float
3752 fminnm (float a, float b)
3753 {
3754 if (fpclassify (a) == FP_NORMAL)
3755 {
3756 if (fpclassify (b) == FP_NORMAL)
3757 return a < b ? a : b;
3758 return a;
3759 }
3760 else if (fpclassify (b) == FP_NORMAL)
3761 return b;
3762 return a;
3763 }
3764
3765 static double
3766 dmaxnm (double a, double b)
3767 {
3768 if (fpclassify (a) == FP_NORMAL)
3769 {
3770 if (fpclassify (b) == FP_NORMAL)
3771 return a > b ? a : b;
3772 return a;
3773 }
3774 else if (fpclassify (b) == FP_NORMAL)
3775 return b;
3776 return a;
3777 }
3778
3779 static double
3780 dminnm (double a, double b)
3781 {
3782 if (fpclassify (a) == FP_NORMAL)
3783 {
3784 if (fpclassify (b) == FP_NORMAL)
3785 return a < b ? a : b;
3786 return a;
3787 }
3788 else if (fpclassify (b) == FP_NORMAL)
3789 return b;
3790 return a;
3791 }
3792
3793 static void
3794 do_vec_FminmaxNMP (sim_cpu *cpu)
3795 {
3796 /* instr [31] = 0
3797 instr [30] = half (0)/full (1)
3798 instr [29,24] = 10 1110
3799 instr [23] = max(0)/min(1)
3800 instr [22] = float (0)/double (1)
3801 instr [21] = 1
3802 instr [20,16] = Vn
3803 instr [15,10] = 1100 01
3804 instr [9,5] = Vm
3805 instr [4.0] = Vd. */
3806
3807 unsigned vm = INSTR (20, 16);
3808 unsigned vn = INSTR (9, 5);
3809 unsigned vd = INSTR (4, 0);
3810 int full = INSTR (30, 30);
3811
3812 NYI_assert (29, 24, 0x2E);
3813 NYI_assert (21, 21, 1);
3814 NYI_assert (15, 10, 0x31);
3815
3816 if (INSTR (22, 22))
3817 {
3818 double (* fn)(double, double) = INSTR (23, 23)
3819 ? dminnm : dmaxnm;
3820
3821 if (! full)
3822 HALT_NYI;
3823 aarch64_set_vec_double (cpu, vd, 0,
3824 fn (aarch64_get_vec_double (cpu, vn, 0),
3825 aarch64_get_vec_double (cpu, vn, 1)));
3826 aarch64_set_vec_double (cpu, vd, 0,
3827 fn (aarch64_get_vec_double (cpu, vm, 0),
3828 aarch64_get_vec_double (cpu, vm, 1)));
3829 }
3830 else
3831 {
3832 float (* fn)(float, float) = INSTR (23, 23)
3833 ? fminnm : fmaxnm;
3834
3835 aarch64_set_vec_float (cpu, vd, 0,
3836 fn (aarch64_get_vec_float (cpu, vn, 0),
3837 aarch64_get_vec_float (cpu, vn, 1)));
3838 if (full)
3839 aarch64_set_vec_float (cpu, vd, 1,
3840 fn (aarch64_get_vec_float (cpu, vn, 2),
3841 aarch64_get_vec_float (cpu, vn, 3)));
3842
3843 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
3844 fn (aarch64_get_vec_float (cpu, vm, 0),
3845 aarch64_get_vec_float (cpu, vm, 1)));
3846 if (full)
3847 aarch64_set_vec_float (cpu, vd, 3,
3848 fn (aarch64_get_vec_float (cpu, vm, 2),
3849 aarch64_get_vec_float (cpu, vm, 3)));
3850 }
3851 }
3852
3853 static void
3854 do_vec_AND (sim_cpu *cpu)
3855 {
3856 /* instr[31] = 0
3857 instr[30] = half (0)/full (1)
3858 instr[29,21] = 001110001
3859 instr[20,16] = Vm
3860 instr[15,10] = 000111
3861 instr[9,5] = Vn
3862 instr[4.0] = Vd. */
3863
3864 unsigned vm = INSTR (20, 16);
3865 unsigned vn = INSTR (9, 5);
3866 unsigned vd = INSTR (4, 0);
3867 unsigned i;
3868 int full = INSTR (30, 30);
3869
3870 NYI_assert (29, 21, 0x071);
3871 NYI_assert (15, 10, 0x07);
3872
3873 for (i = 0; i < (full ? 4 : 2); i++)
3874 aarch64_set_vec_u32 (cpu, vd, i,
3875 aarch64_get_vec_u32 (cpu, vn, i)
3876 & aarch64_get_vec_u32 (cpu, vm, i));
3877 }
3878
3879 static void
3880 do_vec_BSL (sim_cpu *cpu)
3881 {
3882 /* instr[31] = 0
3883 instr[30] = half (0)/full (1)
3884 instr[29,21] = 101110011
3885 instr[20,16] = Vm
3886 instr[15,10] = 000111
3887 instr[9,5] = Vn
3888 instr[4.0] = Vd. */
3889
3890 unsigned vm = INSTR (20, 16);
3891 unsigned vn = INSTR (9, 5);
3892 unsigned vd = INSTR (4, 0);
3893 unsigned i;
3894 int full = INSTR (30, 30);
3895
3896 NYI_assert (29, 21, 0x173);
3897 NYI_assert (15, 10, 0x07);
3898
3899 for (i = 0; i < (full ? 16 : 8); i++)
3900 aarch64_set_vec_u8 (cpu, vd, i,
3901 ( aarch64_get_vec_u8 (cpu, vd, i)
3902 & aarch64_get_vec_u8 (cpu, vn, i))
3903 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
3904 & aarch64_get_vec_u8 (cpu, vm, i)));
3905 }
3906
3907 static void
3908 do_vec_EOR (sim_cpu *cpu)
3909 {
3910 /* instr[31] = 0
3911 instr[30] = half (0)/full (1)
3912 instr[29,21] = 10 1110 001
3913 instr[20,16] = Vm
3914 instr[15,10] = 000111
3915 instr[9,5] = Vn
3916 instr[4.0] = Vd. */
3917
3918 unsigned vm = INSTR (20, 16);
3919 unsigned vn = INSTR (9, 5);
3920 unsigned vd = INSTR (4, 0);
3921 unsigned i;
3922 int full = INSTR (30, 30);
3923
3924 NYI_assert (29, 21, 0x171);
3925 NYI_assert (15, 10, 0x07);
3926
3927 for (i = 0; i < (full ? 4 : 2); i++)
3928 aarch64_set_vec_u32 (cpu, vd, i,
3929 aarch64_get_vec_u32 (cpu, vn, i)
3930 ^ aarch64_get_vec_u32 (cpu, vm, i));
3931 }
3932
3933 static void
3934 do_vec_bit (sim_cpu *cpu)
3935 {
3936 /* instr[31] = 0
3937 instr[30] = half (0)/full (1)
3938 instr[29,23] = 10 1110 1
3939 instr[22] = BIT (0) / BIF (1)
3940 instr[21] = 1
3941 instr[20,16] = Vm
3942 instr[15,10] = 0001 11
3943 instr[9,5] = Vn
3944 instr[4.0] = Vd. */
3945
3946 unsigned vm = INSTR (20, 16);
3947 unsigned vn = INSTR (9, 5);
3948 unsigned vd = INSTR (4, 0);
3949 unsigned full = INSTR (30, 30);
3950 unsigned test_false = INSTR (22, 22);
3951 unsigned i;
3952
3953 NYI_assert (29, 23, 0x5D);
3954 NYI_assert (21, 21, 1);
3955 NYI_assert (15, 10, 0x07);
3956
3957 if (test_false)
3958 {
3959 for (i = 0; i < (full ? 16 : 8); i++)
3960 if (aarch64_get_vec_u32 (cpu, vn, i) == 0)
3961 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
3962 }
3963 else
3964 {
3965 for (i = 0; i < (full ? 16 : 8); i++)
3966 if (aarch64_get_vec_u32 (cpu, vn, i) != 0)
3967 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
3968 }
3969 }
3970
3971 static void
3972 do_vec_ORN (sim_cpu *cpu)
3973 {
3974 /* instr[31] = 0
3975 instr[30] = half (0)/full (1)
3976 instr[29,21] = 00 1110 111
3977 instr[20,16] = Vm
3978 instr[15,10] = 00 0111
3979 instr[9,5] = Vn
3980 instr[4.0] = Vd. */
3981
3982 unsigned vm = INSTR (20, 16);
3983 unsigned vn = INSTR (9, 5);
3984 unsigned vd = INSTR (4, 0);
3985 unsigned i;
3986 int full = INSTR (30, 30);
3987
3988 NYI_assert (29, 21, 0x077);
3989 NYI_assert (15, 10, 0x07);
3990
3991 for (i = 0; i < (full ? 16 : 8); i++)
3992 aarch64_set_vec_u8 (cpu, vd, i,
3993 aarch64_get_vec_u8 (cpu, vn, i)
3994 | ~ aarch64_get_vec_u8 (cpu, vm, i));
3995 }
3996
3997 static void
3998 do_vec_ORR (sim_cpu *cpu)
3999 {
4000 /* instr[31] = 0
4001 instr[30] = half (0)/full (1)
4002 instr[29,21] = 00 1110 101
4003 instr[20,16] = Vm
4004 instr[15,10] = 0001 11
4005 instr[9,5] = Vn
4006 instr[4.0] = Vd. */
4007
4008 unsigned vm = INSTR (20, 16);
4009 unsigned vn = INSTR (9, 5);
4010 unsigned vd = INSTR (4, 0);
4011 unsigned i;
4012 int full = INSTR (30, 30);
4013
4014 NYI_assert (29, 21, 0x075);
4015 NYI_assert (15, 10, 0x07);
4016
4017 for (i = 0; i < (full ? 16 : 8); i++)
4018 aarch64_set_vec_u8 (cpu, vd, i,
4019 aarch64_get_vec_u8 (cpu, vn, i)
4020 | aarch64_get_vec_u8 (cpu, vm, i));
4021 }
4022
4023 static void
4024 do_vec_BIC (sim_cpu *cpu)
4025 {
4026 /* instr[31] = 0
4027 instr[30] = half (0)/full (1)
4028 instr[29,21] = 00 1110 011
4029 instr[20,16] = Vm
4030 instr[15,10] = 00 0111
4031 instr[9,5] = Vn
4032 instr[4.0] = Vd. */
4033
4034 unsigned vm = INSTR (20, 16);
4035 unsigned vn = INSTR (9, 5);
4036 unsigned vd = INSTR (4, 0);
4037 unsigned i;
4038 int full = INSTR (30, 30);
4039
4040 NYI_assert (29, 21, 0x073);
4041 NYI_assert (15, 10, 0x07);
4042
4043 for (i = 0; i < (full ? 16 : 8); i++)
4044 aarch64_set_vec_u8 (cpu, vd, i,
4045 aarch64_get_vec_u8 (cpu, vn, i)
4046 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4047 }
4048
4049 static void
4050 do_vec_XTN (sim_cpu *cpu)
4051 {
4052 /* instr[31] = 0
4053 instr[30] = first part (0)/ second part (1)
4054 instr[29,24] = 00 1110
4055 instr[23,22] = size: byte(00), half(01), word (10)
4056 instr[21,10] = 1000 0100 1010
4057 instr[9,5] = Vs
4058 instr[4,0] = Vd. */
4059
4060 unsigned vs = INSTR (9, 5);
4061 unsigned vd = INSTR (4, 0);
4062 unsigned bias = INSTR (30, 30);
4063 unsigned i;
4064
4065 NYI_assert (29, 24, 0x0E);
4066 NYI_assert (21, 10, 0x84A);
4067
4068 switch (INSTR (23, 22))
4069 {
4070 case 0:
4071 if (bias)
4072 for (i = 0; i < 8; i++)
4073 aarch64_set_vec_u8 (cpu, vd, i + 8,
4074 aarch64_get_vec_u16 (cpu, vs, i) >> 8);
4075 else
4076 for (i = 0; i < 8; i++)
4077 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i));
4078 return;
4079
4080 case 1:
4081 if (bias)
4082 for (i = 0; i < 4; i++)
4083 aarch64_set_vec_u16 (cpu, vd, i + 4,
4084 aarch64_get_vec_u32 (cpu, vs, i) >> 16);
4085 else
4086 for (i = 0; i < 4; i++)
4087 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, i));
4088 return;
4089
4090 case 2:
4091 if (bias)
4092 for (i = 0; i < 2; i++)
4093 aarch64_set_vec_u32 (cpu, vd, i + 4,
4094 aarch64_get_vec_u64 (cpu, vs, i) >> 32);
4095 else
4096 for (i = 0; i < 2; i++)
4097 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, i));
4098 return;
4099 }
4100 }
4101
4102 static void
4103 do_vec_maxv (sim_cpu *cpu)
4104 {
4105 /* instr[31] = 0
4106 instr[30] = half(0)/full(1)
4107 instr[29] = signed (0)/unsigned(1)
4108 instr[28,24] = 0 1110
4109 instr[23,22] = size: byte(00), half(01), word (10)
4110 instr[21] = 1
4111 instr[20,17] = 1 000
4112 instr[16] = max(0)/min(1)
4113 instr[15,10] = 1010 10
4114 instr[9,5] = V source
4115 instr[4.0] = R dest. */
4116
4117 unsigned vs = INSTR (9, 5);
4118 unsigned rd = INSTR (4, 0);
4119 unsigned full = INSTR (30, 30);
4120 unsigned i;
4121
4122 NYI_assert (28, 24, 0x0E);
4123 NYI_assert (21, 21, 1);
4124 NYI_assert (20, 17, 8);
4125 NYI_assert (15, 10, 0x2A);
4126
4127 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4128 {
4129 case 0: /* SMAXV. */
4130 {
4131 int64_t smax;
4132 switch (INSTR (23, 22))
4133 {
4134 case 0:
4135 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4136 for (i = 1; i < (full ? 16 : 8); i++)
4137 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4138 break;
4139 case 1:
4140 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4141 for (i = 1; i < (full ? 8 : 4); i++)
4142 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4143 break;
4144 case 2:
4145 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4146 for (i = 1; i < (full ? 4 : 2); i++)
4147 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4148 break;
4149 case 3:
4150 HALT_UNALLOC;
4151 }
4152 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4153 return;
4154 }
4155
4156 case 1: /* SMINV. */
4157 {
4158 int64_t smin;
4159 switch (INSTR (23, 22))
4160 {
4161 case 0:
4162 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4163 for (i = 1; i < (full ? 16 : 8); i++)
4164 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4165 break;
4166 case 1:
4167 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4168 for (i = 1; i < (full ? 8 : 4); i++)
4169 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4170 break;
4171 case 2:
4172 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4173 for (i = 1; i < (full ? 4 : 2); i++)
4174 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4175 break;
4176
4177 case 3:
4178 HALT_UNALLOC;
4179 }
4180 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4181 return;
4182 }
4183
4184 case 2: /* UMAXV. */
4185 {
4186 uint64_t umax;
4187 switch (INSTR (23, 22))
4188 {
4189 case 0:
4190 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4191 for (i = 1; i < (full ? 16 : 8); i++)
4192 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4193 break;
4194 case 1:
4195 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4196 for (i = 1; i < (full ? 8 : 4); i++)
4197 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4198 break;
4199 case 2:
4200 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4201 for (i = 1; i < (full ? 4 : 2); i++)
4202 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4203 break;
4204
4205 case 3:
4206 HALT_UNALLOC;
4207 }
4208 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4209 return;
4210 }
4211
4212 case 3: /* UMINV. */
4213 {
4214 uint64_t umin;
4215 switch (INSTR (23, 22))
4216 {
4217 case 0:
4218 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4219 for (i = 1; i < (full ? 16 : 8); i++)
4220 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4221 break;
4222 case 1:
4223 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4224 for (i = 1; i < (full ? 8 : 4); i++)
4225 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4226 break;
4227 case 2:
4228 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4229 for (i = 1; i < (full ? 4 : 2); i++)
4230 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4231 break;
4232
4233 case 3:
4234 HALT_UNALLOC;
4235 }
4236 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4237 return;
4238 }
4239 }
4240 }
4241
4242 static void
4243 do_vec_fminmaxV (sim_cpu *cpu)
4244 {
4245 /* instr[31,24] = 0110 1110
4246 instr[23] = max(0)/min(1)
4247 instr[22,14] = 011 0000 11
4248 instr[13,12] = nm(00)/normal(11)
4249 instr[11,10] = 10
4250 instr[9,5] = V source
4251 instr[4.0] = R dest. */
4252
4253 unsigned vs = INSTR (9, 5);
4254 unsigned rd = INSTR (4, 0);
4255 unsigned i;
4256 float res = aarch64_get_vec_float (cpu, vs, 0);
4257
4258 NYI_assert (31, 24, 0x6E);
4259 NYI_assert (22, 14, 0x0C3);
4260 NYI_assert (11, 10, 2);
4261
4262 if (INSTR (23, 23))
4263 {
4264 switch (INSTR (13, 12))
4265 {
4266 case 0: /* FMNINNMV. */
4267 for (i = 1; i < 4; i++)
4268 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4269 break;
4270
4271 case 3: /* FMINV. */
4272 for (i = 1; i < 4; i++)
4273 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4274 break;
4275
4276 default:
4277 HALT_NYI;
4278 }
4279 }
4280 else
4281 {
4282 switch (INSTR (13, 12))
4283 {
4284 case 0: /* FMNAXNMV. */
4285 for (i = 1; i < 4; i++)
4286 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4287 break;
4288
4289 case 3: /* FMAXV. */
4290 for (i = 1; i < 4; i++)
4291 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4292 break;
4293
4294 default:
4295 HALT_NYI;
4296 }
4297 }
4298
4299 aarch64_set_FP_float (cpu, rd, res);
4300 }
4301
4302 static void
4303 do_vec_Fminmax (sim_cpu *cpu)
4304 {
4305 /* instr[31] = 0
4306 instr[30] = half(0)/full(1)
4307 instr[29,24] = 00 1110
4308 instr[23] = max(0)/min(1)
4309 instr[22] = float(0)/double(1)
4310 instr[21] = 1
4311 instr[20,16] = Vm
4312 instr[15,14] = 11
4313 instr[13,12] = nm(00)/normal(11)
4314 instr[11,10] = 01
4315 instr[9,5] = Vn
4316 instr[4,0] = Vd. */
4317
4318 unsigned vm = INSTR (20, 16);
4319 unsigned vn = INSTR (9, 5);
4320 unsigned vd = INSTR (4, 0);
4321 unsigned full = INSTR (30, 30);
4322 unsigned min = INSTR (23, 23);
4323 unsigned i;
4324
4325 NYI_assert (29, 24, 0x0E);
4326 NYI_assert (21, 21, 1);
4327 NYI_assert (15, 14, 3);
4328 NYI_assert (11, 10, 1);
4329
4330 if (INSTR (22, 22))
4331 {
4332 double (* func)(double, double);
4333
4334 if (! full)
4335 HALT_NYI;
4336
4337 if (INSTR (13, 12) == 0)
4338 func = min ? dminnm : dmaxnm;
4339 else if (INSTR (13, 12) == 3)
4340 func = min ? fmin : fmax;
4341 else
4342 HALT_NYI;
4343
4344 for (i = 0; i < 2; i++)
4345 aarch64_set_vec_double (cpu, vd, i,
4346 func (aarch64_get_vec_double (cpu, vn, i),
4347 aarch64_get_vec_double (cpu, vm, i)));
4348 }
4349 else
4350 {
4351 float (* func)(float, float);
4352
4353 if (INSTR (13, 12) == 0)
4354 func = min ? fminnm : fmaxnm;
4355 else if (INSTR (13, 12) == 3)
4356 func = min ? fminf : fmaxf;
4357 else
4358 HALT_NYI;
4359
4360 for (i = 0; i < (full ? 4 : 2); i++)
4361 aarch64_set_vec_float (cpu, vd, i,
4362 func (aarch64_get_vec_float (cpu, vn, i),
4363 aarch64_get_vec_float (cpu, vm, i)));
4364 }
4365 }
4366
4367 static void
4368 do_vec_SCVTF (sim_cpu *cpu)
4369 {
4370 /* instr[31] = 0
4371 instr[30] = Q
4372 instr[29,23] = 00 1110 0
4373 instr[22] = float(0)/double(1)
4374 instr[21,10] = 10 0001 1101 10
4375 instr[9,5] = Vn
4376 instr[4,0] = Vd. */
4377
4378 unsigned vn = INSTR (9, 5);
4379 unsigned vd = INSTR (4, 0);
4380 unsigned full = INSTR (30, 30);
4381 unsigned size = INSTR (22, 22);
4382 unsigned i;
4383
4384 NYI_assert (29, 23, 0x1C);
4385 NYI_assert (21, 10, 0x876);
4386
4387 if (size)
4388 {
4389 if (! full)
4390 HALT_UNALLOC;
4391
4392 for (i = 0; i < 2; i++)
4393 {
4394 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4395 aarch64_set_vec_double (cpu, vd, i, val);
4396 }
4397 }
4398 else
4399 {
4400 for (i = 0; i < (full ? 4 : 2); i++)
4401 {
4402 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4403 aarch64_set_vec_float (cpu, vd, i, val);
4404 }
4405 }
4406 }
4407
4408 #define VEC_CMP(SOURCE, CMP) \
4409 do \
4410 { \
4411 switch (size) \
4412 { \
4413 case 0: \
4414 for (i = 0; i < (full ? 16 : 8); i++) \
4415 aarch64_set_vec_u8 (cpu, vd, i, \
4416 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4417 CMP \
4418 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4419 ? -1 : 0); \
4420 return; \
4421 case 1: \
4422 for (i = 0; i < (full ? 8 : 4); i++) \
4423 aarch64_set_vec_u16 (cpu, vd, i, \
4424 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4425 CMP \
4426 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4427 ? -1 : 0); \
4428 return; \
4429 case 2: \
4430 for (i = 0; i < (full ? 4 : 2); i++) \
4431 aarch64_set_vec_u32 (cpu, vd, i, \
4432 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4433 CMP \
4434 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4435 ? -1 : 0); \
4436 return; \
4437 case 3: \
4438 if (! full) \
4439 HALT_UNALLOC; \
4440 for (i = 0; i < 2; i++) \
4441 aarch64_set_vec_u64 (cpu, vd, i, \
4442 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4443 CMP \
4444 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4445 ? -1ULL : 0); \
4446 return; \
4447 } \
4448 } \
4449 while (0)
4450
4451 #define VEC_CMP0(SOURCE, CMP) \
4452 do \
4453 { \
4454 switch (size) \
4455 { \
4456 case 0: \
4457 for (i = 0; i < (full ? 16 : 8); i++) \
4458 aarch64_set_vec_u8 (cpu, vd, i, \
4459 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4460 CMP 0 ? -1 : 0); \
4461 return; \
4462 case 1: \
4463 for (i = 0; i < (full ? 8 : 4); i++) \
4464 aarch64_set_vec_u16 (cpu, vd, i, \
4465 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4466 CMP 0 ? -1 : 0); \
4467 return; \
4468 case 2: \
4469 for (i = 0; i < (full ? 4 : 2); i++) \
4470 aarch64_set_vec_u32 (cpu, vd, i, \
4471 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4472 CMP 0 ? -1 : 0); \
4473 return; \
4474 case 3: \
4475 if (! full) \
4476 HALT_UNALLOC; \
4477 for (i = 0; i < 2; i++) \
4478 aarch64_set_vec_u64 (cpu, vd, i, \
4479 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4480 CMP 0 ? -1ULL : 0); \
4481 return; \
4482 } \
4483 } \
4484 while (0)
4485
4486 #define VEC_FCMP0(CMP) \
4487 do \
4488 { \
4489 if (vm != 0) \
4490 HALT_NYI; \
4491 if (INSTR (22, 22)) \
4492 { \
4493 if (! full) \
4494 HALT_NYI; \
4495 for (i = 0; i < 2; i++) \
4496 aarch64_set_vec_u64 (cpu, vd, i, \
4497 aarch64_get_vec_double (cpu, vn, i) \
4498 CMP 0.0 ? -1 : 0); \
4499 } \
4500 else \
4501 { \
4502 for (i = 0; i < (full ? 4 : 2); i++) \
4503 aarch64_set_vec_u32 (cpu, vd, i, \
4504 aarch64_get_vec_float (cpu, vn, i) \
4505 CMP 0.0 ? -1 : 0); \
4506 } \
4507 return; \
4508 } \
4509 while (0)
4510
4511 #define VEC_FCMP(CMP) \
4512 do \
4513 { \
4514 if (INSTR (22, 22)) \
4515 { \
4516 if (! full) \
4517 HALT_NYI; \
4518 for (i = 0; i < 2; i++) \
4519 aarch64_set_vec_u64 (cpu, vd, i, \
4520 aarch64_get_vec_double (cpu, vn, i) \
4521 CMP \
4522 aarch64_get_vec_double (cpu, vm, i) \
4523 ? -1 : 0); \
4524 } \
4525 else \
4526 { \
4527 for (i = 0; i < (full ? 4 : 2); i++) \
4528 aarch64_set_vec_u32 (cpu, vd, i, \
4529 aarch64_get_vec_float (cpu, vn, i) \
4530 CMP \
4531 aarch64_get_vec_float (cpu, vm, i) \
4532 ? -1 : 0); \
4533 } \
4534 return; \
4535 } \
4536 while (0)
4537
4538 static void
4539 do_vec_compare (sim_cpu *cpu)
4540 {
4541 /* instr[31] = 0
4542 instr[30] = half(0)/full(1)
4543 instr[29] = part-of-comparison-type
4544 instr[28,24] = 0 1110
4545 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4546 type of float compares: single (-0) / double (-1)
4547 instr[21] = 1
4548 instr[20,16] = Vm or 00000 (compare vs 0)
4549 instr[15,10] = part-of-comparison-type
4550 instr[9,5] = Vn
4551 instr[4.0] = Vd. */
4552
4553 int full = INSTR (30, 30);
4554 int size = INSTR (23, 22);
4555 unsigned vm = INSTR (20, 16);
4556 unsigned vn = INSTR (9, 5);
4557 unsigned vd = INSTR (4, 0);
4558 unsigned i;
4559
4560 NYI_assert (28, 24, 0x0E);
4561 NYI_assert (21, 21, 1);
4562
4563 if ((INSTR (11, 11)
4564 && INSTR (14, 14))
4565 || ((INSTR (11, 11) == 0
4566 && INSTR (10, 10) == 0)))
4567 {
4568 /* A compare vs 0. */
4569 if (vm != 0)
4570 {
4571 if (INSTR (15, 10) == 0x2A)
4572 do_vec_maxv (cpu);
4573 else if (INSTR (15, 10) == 0x32
4574 || INSTR (15, 10) == 0x3E)
4575 do_vec_fminmaxV (cpu);
4576 else if (INSTR (29, 23) == 0x1C
4577 && INSTR (21, 10) == 0x876)
4578 do_vec_SCVTF (cpu);
4579 else
4580 HALT_NYI;
4581 return;
4582 }
4583 }
4584
4585 if (INSTR (14, 14))
4586 {
4587 /* A floating point compare. */
4588 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4589 | INSTR (13, 10);
4590
4591 NYI_assert (15, 15, 1);
4592
4593 switch (decode)
4594 {
4595 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4596 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4597 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4598 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4599 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4600 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4601 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4602 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4603
4604 default:
4605 HALT_NYI;
4606 }
4607 }
4608 else
4609 {
4610 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4611
4612 switch (decode)
4613 {
4614 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4615 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4616 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4617 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4618 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4619 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4620 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4621 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4622 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4623 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4624 default:
4625 if (vm == 0)
4626 HALT_NYI;
4627 do_vec_maxv (cpu);
4628 }
4629 }
4630 }
4631
4632 static void
4633 do_vec_SSHL (sim_cpu *cpu)
4634 {
4635 /* instr[31] = 0
4636 instr[30] = first part (0)/ second part (1)
4637 instr[29,24] = 00 1110
4638 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4639 instr[21] = 1
4640 instr[20,16] = Vm
4641 instr[15,10] = 0100 01
4642 instr[9,5] = Vn
4643 instr[4,0] = Vd. */
4644
4645 unsigned full = INSTR (30, 30);
4646 unsigned vm = INSTR (20, 16);
4647 unsigned vn = INSTR (9, 5);
4648 unsigned vd = INSTR (4, 0);
4649 unsigned i;
4650 signed int shift;
4651
4652 NYI_assert (29, 24, 0x0E);
4653 NYI_assert (21, 21, 1);
4654 NYI_assert (15, 10, 0x11);
4655
4656 /* FIXME: What is a signed shift left in this context ?. */
4657
4658 switch (INSTR (23, 22))
4659 {
4660 case 0:
4661 for (i = 0; i < (full ? 16 : 8); i++)
4662 {
4663 shift = aarch64_get_vec_s8 (cpu, vm, i);
4664 if (shift >= 0)
4665 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4666 << shift);
4667 else
4668 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4669 >> - shift);
4670 }
4671 return;
4672
4673 case 1:
4674 for (i = 0; i < (full ? 8 : 4); i++)
4675 {
4676 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4677 if (shift >= 0)
4678 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4679 << shift);
4680 else
4681 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4682 >> - shift);
4683 }
4684 return;
4685
4686 case 2:
4687 for (i = 0; i < (full ? 4 : 2); i++)
4688 {
4689 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4690 if (shift >= 0)
4691 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4692 << shift);
4693 else
4694 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4695 >> - shift);
4696 }
4697 return;
4698
4699 case 3:
4700 if (! full)
4701 HALT_UNALLOC;
4702 for (i = 0; i < 2; i++)
4703 {
4704 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4705 if (shift >= 0)
4706 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4707 << shift);
4708 else
4709 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4710 >> - shift);
4711 }
4712 return;
4713 }
4714 }
4715
4716 static void
4717 do_vec_USHL (sim_cpu *cpu)
4718 {
4719 /* instr[31] = 0
4720 instr[30] = first part (0)/ second part (1)
4721 instr[29,24] = 10 1110
4722 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4723 instr[21] = 1
4724 instr[20,16] = Vm
4725 instr[15,10] = 0100 01
4726 instr[9,5] = Vn
4727 instr[4,0] = Vd */
4728
4729 unsigned full = INSTR (30, 30);
4730 unsigned vm = INSTR (20, 16);
4731 unsigned vn = INSTR (9, 5);
4732 unsigned vd = INSTR (4, 0);
4733 unsigned i;
4734 signed int shift;
4735
4736 NYI_assert (29, 24, 0x2E);
4737 NYI_assert (15, 10, 0x11);
4738
4739 switch (INSTR (23, 22))
4740 {
4741 case 0:
4742 for (i = 0; i < (full ? 16 : 8); i++)
4743 {
4744 shift = aarch64_get_vec_s8 (cpu, vm, i);
4745 if (shift >= 0)
4746 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4747 << shift);
4748 else
4749 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4750 >> - shift);
4751 }
4752 return;
4753
4754 case 1:
4755 for (i = 0; i < (full ? 8 : 4); i++)
4756 {
4757 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4758 if (shift >= 0)
4759 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4760 << shift);
4761 else
4762 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4763 >> - shift);
4764 }
4765 return;
4766
4767 case 2:
4768 for (i = 0; i < (full ? 4 : 2); i++)
4769 {
4770 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4771 if (shift >= 0)
4772 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4773 << shift);
4774 else
4775 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4776 >> - shift);
4777 }
4778 return;
4779
4780 case 3:
4781 if (! full)
4782 HALT_UNALLOC;
4783 for (i = 0; i < 2; i++)
4784 {
4785 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4786 if (shift >= 0)
4787 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4788 << shift);
4789 else
4790 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4791 >> - shift);
4792 }
4793 return;
4794 }
4795 }
4796
4797 static void
4798 do_vec_FMLA (sim_cpu *cpu)
4799 {
4800 /* instr[31] = 0
4801 instr[30] = full/half selector
4802 instr[29,23] = 0011100
4803 instr[22] = size: 0=>float, 1=>double
4804 instr[21] = 1
4805 instr[20,16] = Vn
4806 instr[15,10] = 1100 11
4807 instr[9,5] = Vm
4808 instr[4.0] = Vd. */
4809
4810 unsigned vm = INSTR (20, 16);
4811 unsigned vn = INSTR (9, 5);
4812 unsigned vd = INSTR (4, 0);
4813 unsigned i;
4814 int full = INSTR (30, 30);
4815
4816 NYI_assert (29, 23, 0x1C);
4817 NYI_assert (21, 21, 1);
4818 NYI_assert (15, 10, 0x33);
4819
4820 if (INSTR (22, 22))
4821 {
4822 if (! full)
4823 HALT_UNALLOC;
4824 for (i = 0; i < 2; i++)
4825 aarch64_set_vec_double (cpu, vd, i,
4826 aarch64_get_vec_double (cpu, vn, i) *
4827 aarch64_get_vec_double (cpu, vm, i) +
4828 aarch64_get_vec_double (cpu, vd, i));
4829 }
4830 else
4831 {
4832 for (i = 0; i < (full ? 4 : 2); i++)
4833 aarch64_set_vec_float (cpu, vd, i,
4834 aarch64_get_vec_float (cpu, vn, i) *
4835 aarch64_get_vec_float (cpu, vm, i) +
4836 aarch64_get_vec_float (cpu, vd, i));
4837 }
4838 }
4839
4840 static void
4841 do_vec_max (sim_cpu *cpu)
4842 {
4843 /* instr[31] = 0
4844 instr[30] = full/half selector
4845 instr[29] = SMAX (0) / UMAX (1)
4846 instr[28,24] = 0 1110
4847 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4848 instr[21] = 1
4849 instr[20,16] = Vn
4850 instr[15,10] = 0110 01
4851 instr[9,5] = Vm
4852 instr[4.0] = Vd. */
4853
4854 unsigned vm = INSTR (20, 16);
4855 unsigned vn = INSTR (9, 5);
4856 unsigned vd = INSTR (4, 0);
4857 unsigned i;
4858 int full = INSTR (30, 30);
4859
4860 NYI_assert (28, 24, 0x0E);
4861 NYI_assert (21, 21, 1);
4862 NYI_assert (15, 10, 0x19);
4863
4864 if (INSTR (29, 29))
4865 {
4866 switch (INSTR (23, 22))
4867 {
4868 case 0:
4869 for (i = 0; i < (full ? 16 : 8); i++)
4870 aarch64_set_vec_u8 (cpu, vd, i,
4871 aarch64_get_vec_u8 (cpu, vn, i)
4872 > aarch64_get_vec_u8 (cpu, vm, i)
4873 ? aarch64_get_vec_u8 (cpu, vn, i)
4874 : aarch64_get_vec_u8 (cpu, vm, i));
4875 return;
4876
4877 case 1:
4878 for (i = 0; i < (full ? 8 : 4); i++)
4879 aarch64_set_vec_u16 (cpu, vd, i,
4880 aarch64_get_vec_u16 (cpu, vn, i)
4881 > aarch64_get_vec_u16 (cpu, vm, i)
4882 ? aarch64_get_vec_u16 (cpu, vn, i)
4883 : aarch64_get_vec_u16 (cpu, vm, i));
4884 return;
4885
4886 case 2:
4887 for (i = 0; i < (full ? 4 : 2); i++)
4888 aarch64_set_vec_u32 (cpu, vd, i,
4889 aarch64_get_vec_u32 (cpu, vn, i)
4890 > aarch64_get_vec_u32 (cpu, vm, i)
4891 ? aarch64_get_vec_u32 (cpu, vn, i)
4892 : aarch64_get_vec_u32 (cpu, vm, i));
4893 return;
4894
4895 case 3:
4896 HALT_UNALLOC;
4897 }
4898 }
4899 else
4900 {
4901 switch (INSTR (23, 22))
4902 {
4903 case 0:
4904 for (i = 0; i < (full ? 16 : 8); i++)
4905 aarch64_set_vec_s8 (cpu, vd, i,
4906 aarch64_get_vec_s8 (cpu, vn, i)
4907 > aarch64_get_vec_s8 (cpu, vm, i)
4908 ? aarch64_get_vec_s8 (cpu, vn, i)
4909 : aarch64_get_vec_s8 (cpu, vm, i));
4910 return;
4911
4912 case 1:
4913 for (i = 0; i < (full ? 8 : 4); i++)
4914 aarch64_set_vec_s16 (cpu, vd, i,
4915 aarch64_get_vec_s16 (cpu, vn, i)
4916 > aarch64_get_vec_s16 (cpu, vm, i)
4917 ? aarch64_get_vec_s16 (cpu, vn, i)
4918 : aarch64_get_vec_s16 (cpu, vm, i));
4919 return;
4920
4921 case 2:
4922 for (i = 0; i < (full ? 4 : 2); i++)
4923 aarch64_set_vec_s32 (cpu, vd, i,
4924 aarch64_get_vec_s32 (cpu, vn, i)
4925 > aarch64_get_vec_s32 (cpu, vm, i)
4926 ? aarch64_get_vec_s32 (cpu, vn, i)
4927 : aarch64_get_vec_s32 (cpu, vm, i));
4928 return;
4929
4930 case 3:
4931 HALT_UNALLOC;
4932 }
4933 }
4934 }
4935
4936 static void
4937 do_vec_min (sim_cpu *cpu)
4938 {
4939 /* instr[31] = 0
4940 instr[30] = full/half selector
4941 instr[29] = SMIN (0) / UMIN (1)
4942 instr[28,24] = 0 1110
4943 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4944 instr[21] = 1
4945 instr[20,16] = Vn
4946 instr[15,10] = 0110 11
4947 instr[9,5] = Vm
4948 instr[4.0] = Vd. */
4949
4950 unsigned vm = INSTR (20, 16);
4951 unsigned vn = INSTR (9, 5);
4952 unsigned vd = INSTR (4, 0);
4953 unsigned i;
4954 int full = INSTR (30, 30);
4955
4956 NYI_assert (28, 24, 0x0E);
4957 NYI_assert (21, 21, 1);
4958 NYI_assert (15, 10, 0x1B);
4959
4960 if (INSTR (29, 29))
4961 {
4962 switch (INSTR (23, 22))
4963 {
4964 case 0:
4965 for (i = 0; i < (full ? 16 : 8); i++)
4966 aarch64_set_vec_u8 (cpu, vd, i,
4967 aarch64_get_vec_u8 (cpu, vn, i)
4968 < aarch64_get_vec_u8 (cpu, vm, i)
4969 ? aarch64_get_vec_u8 (cpu, vn, i)
4970 : aarch64_get_vec_u8 (cpu, vm, i));
4971 return;
4972
4973 case 1:
4974 for (i = 0; i < (full ? 8 : 4); i++)
4975 aarch64_set_vec_u16 (cpu, vd, i,
4976 aarch64_get_vec_u16 (cpu, vn, i)
4977 < aarch64_get_vec_u16 (cpu, vm, i)
4978 ? aarch64_get_vec_u16 (cpu, vn, i)
4979 : aarch64_get_vec_u16 (cpu, vm, i));
4980 return;
4981
4982 case 2:
4983 for (i = 0; i < (full ? 4 : 2); i++)
4984 aarch64_set_vec_u32 (cpu, vd, i,
4985 aarch64_get_vec_u32 (cpu, vn, i)
4986 < aarch64_get_vec_u32 (cpu, vm, i)
4987 ? aarch64_get_vec_u32 (cpu, vn, i)
4988 : aarch64_get_vec_u32 (cpu, vm, i));
4989 return;
4990
4991 case 3:
4992 HALT_UNALLOC;
4993 }
4994 }
4995 else
4996 {
4997 switch (INSTR (23, 22))
4998 {
4999 case 0:
5000 for (i = 0; i < (full ? 16 : 8); i++)
5001 aarch64_set_vec_s8 (cpu, vd, i,
5002 aarch64_get_vec_s8 (cpu, vn, i)
5003 < aarch64_get_vec_s8 (cpu, vm, i)
5004 ? aarch64_get_vec_s8 (cpu, vn, i)
5005 : aarch64_get_vec_s8 (cpu, vm, i));
5006 return;
5007
5008 case 1:
5009 for (i = 0; i < (full ? 8 : 4); i++)
5010 aarch64_set_vec_s16 (cpu, vd, i,
5011 aarch64_get_vec_s16 (cpu, vn, i)
5012 < aarch64_get_vec_s16 (cpu, vm, i)
5013 ? aarch64_get_vec_s16 (cpu, vn, i)
5014 : aarch64_get_vec_s16 (cpu, vm, i));
5015 return;
5016
5017 case 2:
5018 for (i = 0; i < (full ? 4 : 2); i++)
5019 aarch64_set_vec_s32 (cpu, vd, i,
5020 aarch64_get_vec_s32 (cpu, vn, i)
5021 < aarch64_get_vec_s32 (cpu, vm, i)
5022 ? aarch64_get_vec_s32 (cpu, vn, i)
5023 : aarch64_get_vec_s32 (cpu, vm, i));
5024 return;
5025
5026 case 3:
5027 HALT_UNALLOC;
5028 }
5029 }
5030 }
5031
5032 static void
5033 do_vec_sub_long (sim_cpu *cpu)
5034 {
5035 /* instr[31] = 0
5036 instr[30] = lower (0) / upper (1)
5037 instr[29] = signed (0) / unsigned (1)
5038 instr[28,24] = 0 1110
5039 instr[23,22] = size: bytes (00), half (01), word (10)
5040 instr[21] = 1
5041 insrt[20,16] = Vm
5042 instr[15,10] = 0010 00
5043 instr[9,5] = Vn
5044 instr[4,0] = V dest. */
5045
5046 unsigned size = INSTR (23, 22);
5047 unsigned vm = INSTR (20, 16);
5048 unsigned vn = INSTR (9, 5);
5049 unsigned vd = INSTR (4, 0);
5050 unsigned bias = 0;
5051 unsigned i;
5052
5053 NYI_assert (28, 24, 0x0E);
5054 NYI_assert (21, 21, 1);
5055 NYI_assert (15, 10, 0x08);
5056
5057 if (size == 3)
5058 HALT_UNALLOC;
5059
5060 switch (INSTR (30, 29))
5061 {
5062 case 2: /* SSUBL2. */
5063 bias = 2;
5064 case 0: /* SSUBL. */
5065 switch (size)
5066 {
5067 case 0:
5068 bias *= 3;
5069 for (i = 0; i < 8; i++)
5070 aarch64_set_vec_s16 (cpu, vd, i,
5071 aarch64_get_vec_s8 (cpu, vn, i + bias)
5072 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5073 break;
5074
5075 case 1:
5076 bias *= 2;
5077 for (i = 0; i < 4; i++)
5078 aarch64_set_vec_s32 (cpu, vd, i,
5079 aarch64_get_vec_s16 (cpu, vn, i + bias)
5080 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5081 break;
5082
5083 case 2:
5084 for (i = 0; i < 2; i++)
5085 aarch64_set_vec_s64 (cpu, vd, i,
5086 aarch64_get_vec_s32 (cpu, vn, i + bias)
5087 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5088 break;
5089
5090 default:
5091 HALT_UNALLOC;
5092 }
5093 break;
5094
5095 case 3: /* USUBL2. */
5096 bias = 2;
5097 case 1: /* USUBL. */
5098 switch (size)
5099 {
5100 case 0:
5101 bias *= 3;
5102 for (i = 0; i < 8; i++)
5103 aarch64_set_vec_u16 (cpu, vd, i,
5104 aarch64_get_vec_u8 (cpu, vn, i + bias)
5105 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5106 break;
5107
5108 case 1:
5109 bias *= 2;
5110 for (i = 0; i < 4; i++)
5111 aarch64_set_vec_u32 (cpu, vd, i,
5112 aarch64_get_vec_u16 (cpu, vn, i + bias)
5113 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5114 break;
5115
5116 case 2:
5117 for (i = 0; i < 2; i++)
5118 aarch64_set_vec_u64 (cpu, vd, i,
5119 aarch64_get_vec_u32 (cpu, vn, i + bias)
5120 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5121 break;
5122
5123 default:
5124 HALT_UNALLOC;
5125 }
5126 break;
5127 }
5128 }
5129
5130 static void
5131 do_vec_ADDP (sim_cpu *cpu)
5132 {
5133 /* instr[31] = 0
5134 instr[30] = half(0)/full(1)
5135 instr[29,24] = 00 1110
5136 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5137 instr[21] = 1
5138 insrt[20,16] = Vm
5139 instr[15,10] = 1011 11
5140 instr[9,5] = Vn
5141 instr[4,0] = V dest. */
5142
5143 FRegister copy_vn;
5144 FRegister copy_vm;
5145 unsigned full = INSTR (30, 30);
5146 unsigned size = INSTR (23, 22);
5147 unsigned vm = INSTR (20, 16);
5148 unsigned vn = INSTR (9, 5);
5149 unsigned vd = INSTR (4, 0);
5150 unsigned i, range;
5151
5152 NYI_assert (29, 24, 0x0E);
5153 NYI_assert (21, 21, 1);
5154 NYI_assert (15, 10, 0x2F);
5155
5156 /* Make copies of the source registers in case vd == vn/vm. */
5157 copy_vn = cpu->fr[vn];
5158 copy_vm = cpu->fr[vm];
5159
5160 switch (size)
5161 {
5162 case 0:
5163 range = full ? 8 : 4;
5164 for (i = 0; i < range; i++)
5165 {
5166 aarch64_set_vec_u8 (cpu, vd, i,
5167 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5168 aarch64_set_vec_u8 (cpu, vd, i + range,
5169 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5170 }
5171 return;
5172
5173 case 1:
5174 range = full ? 4 : 2;
5175 for (i = 0; i < range; i++)
5176 {
5177 aarch64_set_vec_u16 (cpu, vd, i,
5178 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5179 aarch64_set_vec_u16 (cpu, vd, i + range,
5180 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5181 }
5182 return;
5183
5184 case 2:
5185 range = full ? 2 : 1;
5186 for (i = 0; i < range; i++)
5187 {
5188 aarch64_set_vec_u32 (cpu, vd, i,
5189 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5190 aarch64_set_vec_u32 (cpu, vd, i + range,
5191 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5192 }
5193 return;
5194
5195 case 3:
5196 if (! full)
5197 HALT_UNALLOC;
5198 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5199 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5200 return;
5201 }
5202 }
5203
5204 static void
5205 do_vec_UMOV (sim_cpu *cpu)
5206 {
5207 /* instr[31] = 0
5208 instr[30] = 32-bit(0)/64-bit(1)
5209 instr[29,21] = 00 1110 000
5210 insrt[20,16] = size & index
5211 instr[15,10] = 0011 11
5212 instr[9,5] = V source
5213 instr[4,0] = R dest. */
5214
5215 unsigned vs = INSTR (9, 5);
5216 unsigned rd = INSTR (4, 0);
5217 unsigned index;
5218
5219 NYI_assert (29, 21, 0x070);
5220 NYI_assert (15, 10, 0x0F);
5221
5222 if (INSTR (16, 16))
5223 {
5224 /* Byte transfer. */
5225 index = INSTR (20, 17);
5226 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5227 aarch64_get_vec_u8 (cpu, vs, index));
5228 }
5229 else if (INSTR (17, 17))
5230 {
5231 index = INSTR (20, 18);
5232 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5233 aarch64_get_vec_u16 (cpu, vs, index));
5234 }
5235 else if (INSTR (18, 18))
5236 {
5237 index = INSTR (20, 19);
5238 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5239 aarch64_get_vec_u32 (cpu, vs, index));
5240 }
5241 else
5242 {
5243 if (INSTR (30, 30) != 1)
5244 HALT_UNALLOC;
5245
5246 index = INSTR (20, 20);
5247 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5248 aarch64_get_vec_u64 (cpu, vs, index));
5249 }
5250 }
5251
5252 static void
5253 do_vec_FABS (sim_cpu *cpu)
5254 {
5255 /* instr[31] = 0
5256 instr[30] = half(0)/full(1)
5257 instr[29,23] = 00 1110 1
5258 instr[22] = float(0)/double(1)
5259 instr[21,16] = 10 0000
5260 instr[15,10] = 1111 10
5261 instr[9,5] = Vn
5262 instr[4,0] = Vd. */
5263
5264 unsigned vn = INSTR (9, 5);
5265 unsigned vd = INSTR (4, 0);
5266 unsigned full = INSTR (30, 30);
5267 unsigned i;
5268
5269 NYI_assert (29, 23, 0x1D);
5270 NYI_assert (21, 10, 0x83E);
5271
5272 if (INSTR (22, 22))
5273 {
5274 if (! full)
5275 HALT_NYI;
5276
5277 for (i = 0; i < 2; i++)
5278 aarch64_set_vec_double (cpu, vd, i,
5279 fabs (aarch64_get_vec_double (cpu, vn, i)));
5280 }
5281 else
5282 {
5283 for (i = 0; i < (full ? 4 : 2); i++)
5284 aarch64_set_vec_float (cpu, vd, i,
5285 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5286 }
5287 }
5288
5289 static void
5290 do_vec_FCVTZS (sim_cpu *cpu)
5291 {
5292 /* instr[31] = 0
5293 instr[30] = half (0) / all (1)
5294 instr[29,23] = 00 1110 1
5295 instr[22] = single (0) / double (1)
5296 instr[21,10] = 10 0001 1011 10
5297 instr[9,5] = Rn
5298 instr[4,0] = Rd. */
5299
5300 unsigned rn = INSTR (9, 5);
5301 unsigned rd = INSTR (4, 0);
5302 unsigned full = INSTR (30, 30);
5303 unsigned i;
5304
5305 NYI_assert (31, 31, 0);
5306 NYI_assert (29, 23, 0x1D);
5307 NYI_assert (21, 10, 0x86E);
5308
5309 if (INSTR (22, 22))
5310 {
5311 if (! full)
5312 HALT_UNALLOC;
5313
5314 for (i = 0; i < 2; i++)
5315 aarch64_set_vec_s64 (cpu, rd, i,
5316 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5317 }
5318 else
5319 for (i = 0; i < (full ? 4 : 2); i++)
5320 aarch64_set_vec_s32 (cpu, rd, i,
5321 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5322 }
5323
5324 static void
5325 do_vec_op1 (sim_cpu *cpu)
5326 {
5327 /* instr[31] = 0
5328 instr[30] = half/full
5329 instr[29,24] = 00 1110
5330 instr[23,21] = ???
5331 instr[20,16] = Vm
5332 instr[15,10] = sub-opcode
5333 instr[9,5] = Vn
5334 instr[4,0] = Vd */
5335 NYI_assert (29, 24, 0x0E);
5336
5337 if (INSTR (21, 21) == 0)
5338 {
5339 if (INSTR (23, 22) == 0)
5340 {
5341 if (INSTR (30, 30) == 1
5342 && INSTR (17, 14) == 0
5343 && INSTR (12, 10) == 7)
5344 return do_vec_ins_2 (cpu);
5345
5346 switch (INSTR (15, 10))
5347 {
5348 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5349 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5350 case 0x07: do_vec_INS (cpu); return;
5351 case 0x0A: do_vec_TRN (cpu); return;
5352
5353 case 0x0F:
5354 if (INSTR (17, 16) == 0)
5355 {
5356 do_vec_MOV_into_scalar (cpu);
5357 return;
5358 }
5359 break;
5360
5361 case 0x00:
5362 case 0x08:
5363 case 0x10:
5364 case 0x18:
5365 do_vec_TBL (cpu); return;
5366
5367 case 0x06:
5368 case 0x16:
5369 do_vec_UZP (cpu); return;
5370
5371 case 0x0E:
5372 case 0x1E:
5373 do_vec_ZIP (cpu); return;
5374
5375 default:
5376 HALT_NYI;
5377 }
5378 }
5379
5380 switch (INSTR (13, 10))
5381 {
5382 case 0x6: do_vec_UZP (cpu); return;
5383 case 0xE: do_vec_ZIP (cpu); return;
5384 case 0xA: do_vec_TRN (cpu); return;
5385 case 0xF: do_vec_UMOV (cpu); return;
5386 default: HALT_NYI;
5387 }
5388 }
5389
5390 switch (INSTR (15, 10))
5391 {
5392 case 0x07:
5393 switch (INSTR (23, 21))
5394 {
5395 case 1: do_vec_AND (cpu); return;
5396 case 3: do_vec_BIC (cpu); return;
5397 case 5: do_vec_ORR (cpu); return;
5398 case 7: do_vec_ORN (cpu); return;
5399 default: HALT_NYI;
5400 }
5401
5402 case 0x08: do_vec_sub_long (cpu); return;
5403 case 0x0a: do_vec_XTN (cpu); return;
5404 case 0x11: do_vec_SSHL (cpu); return;
5405 case 0x19: do_vec_max (cpu); return;
5406 case 0x1B: do_vec_min (cpu); return;
5407 case 0x21: do_vec_add (cpu); return;
5408 case 0x25: do_vec_MLA (cpu); return;
5409 case 0x27: do_vec_mul (cpu); return;
5410 case 0x2F: do_vec_ADDP (cpu); return;
5411 case 0x30: do_vec_mull (cpu); return;
5412 case 0x33: do_vec_FMLA (cpu); return;
5413 case 0x35: do_vec_fadd (cpu); return;
5414
5415 case 0x2E:
5416 switch (INSTR (20, 16))
5417 {
5418 case 0x00: do_vec_ABS (cpu); return;
5419 case 0x01: do_vec_FCVTZS (cpu); return;
5420 case 0x11: do_vec_ADDV (cpu); return;
5421 default: HALT_NYI;
5422 }
5423
5424 case 0x31:
5425 case 0x3B:
5426 do_vec_Fminmax (cpu); return;
5427
5428 case 0x0D:
5429 case 0x0F:
5430 case 0x22:
5431 case 0x23:
5432 case 0x26:
5433 case 0x2A:
5434 case 0x32:
5435 case 0x36:
5436 case 0x39:
5437 case 0x3A:
5438 do_vec_compare (cpu); return;
5439
5440 case 0x3E:
5441 do_vec_FABS (cpu); return;
5442
5443 default:
5444 HALT_NYI;
5445 }
5446 }
5447
5448 static void
5449 do_vec_xtl (sim_cpu *cpu)
5450 {
5451 /* instr[31] = 0
5452 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5453 instr[28,22] = 0 1111 00
5454 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5455 instr[15,10] = 1010 01
5456 instr[9,5] = V source
5457 instr[4,0] = V dest. */
5458
5459 unsigned vs = INSTR (9, 5);
5460 unsigned vd = INSTR (4, 0);
5461 unsigned i, shift, bias = 0;
5462
5463 NYI_assert (28, 22, 0x3C);
5464 NYI_assert (15, 10, 0x29);
5465
5466 switch (INSTR (30, 29))
5467 {
5468 case 2: /* SXTL2, SSHLL2. */
5469 bias = 2;
5470 case 0: /* SXTL, SSHLL. */
5471 if (INSTR (21, 21))
5472 {
5473 int64_t val1, val2;
5474
5475 shift = INSTR (20, 16);
5476 /* Get the source values before setting the destination values
5477 in case the source and destination are the same. */
5478 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5479 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5480 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5481 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5482 }
5483 else if (INSTR (20, 20))
5484 {
5485 int32_t v[4];
5486 int32_t v1,v2,v3,v4;
5487
5488 shift = INSTR (19, 16);
5489 bias *= 2;
5490 for (i = 0; i < 4; i++)
5491 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5492 for (i = 0; i < 4; i++)
5493 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5494 }
5495 else
5496 {
5497 int16_t v[8];
5498 NYI_assert (19, 19, 1);
5499
5500 shift = INSTR (18, 16);
5501 bias *= 3;
5502 for (i = 0; i < 8; i++)
5503 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5504 for (i = 0; i < 8; i++)
5505 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5506 }
5507 return;
5508
5509 case 3: /* UXTL2, USHLL2. */
5510 bias = 2;
5511 case 1: /* UXTL, USHLL. */
5512 if (INSTR (21, 21))
5513 {
5514 uint64_t v1, v2;
5515 shift = INSTR (20, 16);
5516 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5517 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5518 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5519 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5520 }
5521 else if (INSTR (20, 20))
5522 {
5523 uint32_t v[4];
5524 shift = INSTR (19, 16);
5525 bias *= 2;
5526 for (i = 0; i < 4; i++)
5527 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5528 for (i = 0; i < 4; i++)
5529 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5530 }
5531 else
5532 {
5533 uint16_t v[8];
5534 NYI_assert (19, 19, 1);
5535
5536 shift = INSTR (18, 16);
5537 bias *= 3;
5538 for (i = 0; i < 8; i++)
5539 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5540 for (i = 0; i < 8; i++)
5541 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5542 }
5543 return;
5544 }
5545 }
5546
5547 static void
5548 do_vec_SHL (sim_cpu *cpu)
5549 {
5550 /* instr [31] = 0
5551 instr [30] = half(0)/full(1)
5552 instr [29,23] = 001 1110
5553 instr [22,16] = size and shift amount
5554 instr [15,10] = 01 0101
5555 instr [9, 5] = Vs
5556 instr [4, 0] = Vd. */
5557
5558 int shift;
5559 int full = INSTR (30, 30);
5560 unsigned vs = INSTR (9, 5);
5561 unsigned vd = INSTR (4, 0);
5562 unsigned i;
5563
5564 NYI_assert (29, 23, 0x1E);
5565 NYI_assert (15, 10, 0x15);
5566
5567 if (INSTR (22, 22))
5568 {
5569 shift = INSTR (21, 16);
5570
5571 if (full == 0)
5572 HALT_UNALLOC;
5573
5574 for (i = 0; i < 2; i++)
5575 {
5576 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5577 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5578 }
5579
5580 return;
5581 }
5582
5583 if (INSTR (21, 21))
5584 {
5585 shift = INSTR (20, 16);
5586
5587 for (i = 0; i < (full ? 4 : 2); i++)
5588 {
5589 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5590 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5591 }
5592
5593 return;
5594 }
5595
5596 if (INSTR (20, 20))
5597 {
5598 shift = INSTR (19, 16);
5599
5600 for (i = 0; i < (full ? 8 : 4); i++)
5601 {
5602 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5603 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5604 }
5605
5606 return;
5607 }
5608
5609 if (INSTR (19, 19) == 0)
5610 HALT_UNALLOC;
5611
5612 shift = INSTR (18, 16);
5613
5614 for (i = 0; i < (full ? 16 : 8); i++)
5615 {
5616 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5617 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5618 }
5619 }
5620
5621 static void
5622 do_vec_SSHR_USHR (sim_cpu *cpu)
5623 {
5624 /* instr [31] = 0
5625 instr [30] = half(0)/full(1)
5626 instr [29] = signed(0)/unsigned(1)
5627 instr [28,23] = 0 1111 0
5628 instr [22,16] = size and shift amount
5629 instr [15,10] = 0000 01
5630 instr [9, 5] = Vs
5631 instr [4, 0] = Vd. */
5632
5633 int full = INSTR (30, 30);
5634 int sign = ! INSTR (29, 29);
5635 unsigned shift = INSTR (22, 16);
5636 unsigned vs = INSTR (9, 5);
5637 unsigned vd = INSTR (4, 0);
5638 unsigned i;
5639
5640 NYI_assert (28, 23, 0x1E);
5641 NYI_assert (15, 10, 0x01);
5642
5643 if (INSTR (22, 22))
5644 {
5645 shift = 128 - shift;
5646
5647 if (full == 0)
5648 HALT_UNALLOC;
5649
5650 if (sign)
5651 for (i = 0; i < 2; i++)
5652 {
5653 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5654 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5655 }
5656 else
5657 for (i = 0; i < 2; i++)
5658 {
5659 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5660 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5661 }
5662
5663 return;
5664 }
5665
5666 if (INSTR (21, 21))
5667 {
5668 shift = 64 - shift;
5669
5670 if (sign)
5671 for (i = 0; i < (full ? 4 : 2); i++)
5672 {
5673 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5674 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5675 }
5676 else
5677 for (i = 0; i < (full ? 4 : 2); i++)
5678 {
5679 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5680 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5681 }
5682
5683 return;
5684 }
5685
5686 if (INSTR (20, 20))
5687 {
5688 shift = 32 - shift;
5689
5690 if (sign)
5691 for (i = 0; i < (full ? 8 : 4); i++)
5692 {
5693 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
5694 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
5695 }
5696 else
5697 for (i = 0; i < (full ? 8 : 4); i++)
5698 {
5699 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5700 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
5701 }
5702
5703 return;
5704 }
5705
5706 if (INSTR (19, 19) == 0)
5707 HALT_UNALLOC;
5708
5709 shift = 16 - shift;
5710
5711 if (sign)
5712 for (i = 0; i < (full ? 16 : 8); i++)
5713 {
5714 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
5715 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
5716 }
5717 else
5718 for (i = 0; i < (full ? 16 : 8); i++)
5719 {
5720 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5721 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
5722 }
5723 }
5724
5725 static void
5726 do_vec_MUL_by_element (sim_cpu *cpu)
5727 {
5728 /* instr[31] = 0
5729 instr[30] = half/full
5730 instr[29,24] = 00 1111
5731 instr[23,22] = size
5732 instr[21] = L
5733 instr[20] = M
5734 instr[19,16] = m
5735 instr[15,12] = 1000
5736 instr[11] = H
5737 instr[10] = 0
5738 instr[9,5] = Vn
5739 instr[4,0] = Vd */
5740
5741 unsigned full = INSTR (30, 30);
5742 unsigned L = INSTR (21, 21);
5743 unsigned H = INSTR (11, 11);
5744 unsigned vn = INSTR (9, 5);
5745 unsigned vd = INSTR (4, 0);
5746 unsigned size = INSTR (23, 22);
5747 unsigned index;
5748 unsigned vm;
5749 unsigned e;
5750
5751 NYI_assert (29, 24, 0x0F);
5752 NYI_assert (15, 12, 0x8);
5753 NYI_assert (10, 10, 0);
5754
5755 switch (size)
5756 {
5757 case 1:
5758 {
5759 /* 16 bit products. */
5760 uint16_t product;
5761 uint16_t element1;
5762 uint16_t element2;
5763
5764 index = (H << 2) | (L << 1) | INSTR (20, 20);
5765 vm = INSTR (19, 16);
5766 element2 = aarch64_get_vec_u16 (cpu, vm, index);
5767
5768 for (e = 0; e < (full ? 8 : 4); e ++)
5769 {
5770 element1 = aarch64_get_vec_u16 (cpu, vn, e);
5771 product = element1 * element2;
5772 aarch64_set_vec_u16 (cpu, vd, e, product);
5773 }
5774 }
5775 break;
5776
5777 case 2:
5778 {
5779 /* 32 bit products. */
5780 uint32_t product;
5781 uint32_t element1;
5782 uint32_t element2;
5783
5784 index = (H << 1) | L;
5785 vm = INSTR (20, 16);
5786 element2 = aarch64_get_vec_u32 (cpu, vm, index);
5787
5788 for (e = 0; e < (full ? 4 : 2); e ++)
5789 {
5790 element1 = aarch64_get_vec_u32 (cpu, vn, e);
5791 product = element1 * element2;
5792 aarch64_set_vec_u32 (cpu, vd, e, product);
5793 }
5794 }
5795 break;
5796
5797 default:
5798 HALT_UNALLOC;
5799 }
5800 }
5801
5802 static void
5803 do_vec_op2 (sim_cpu *cpu)
5804 {
5805 /* instr[31] = 0
5806 instr[30] = half/full
5807 instr[29,24] = 00 1111
5808 instr[23] = ?
5809 instr[22,16] = element size & index
5810 instr[15,10] = sub-opcode
5811 instr[9,5] = Vm
5812 instr[4,0] = Vd */
5813
5814 NYI_assert (29, 24, 0x0F);
5815
5816 if (INSTR (23, 23) != 0)
5817 {
5818 switch (INSTR (15, 10))
5819 {
5820 case 0x20:
5821 case 0x22: do_vec_MUL_by_element (cpu); return;
5822 default: HALT_NYI;
5823 }
5824 }
5825 else
5826 {
5827 switch (INSTR (15, 10))
5828 {
5829 case 0x01: do_vec_SSHR_USHR (cpu); return;
5830 case 0x15: do_vec_SHL (cpu); return;
5831 case 0x20:
5832 case 0x22: do_vec_MUL_by_element (cpu); return;
5833 case 0x29: do_vec_xtl (cpu); return;
5834 default: HALT_NYI;
5835 }
5836 }
5837 }
5838
5839 static void
5840 do_vec_neg (sim_cpu *cpu)
5841 {
5842 /* instr[31] = 0
5843 instr[30] = full(1)/half(0)
5844 instr[29,24] = 10 1110
5845 instr[23,22] = size: byte(00), half (01), word (10), long (11)
5846 instr[21,10] = 1000 0010 1110
5847 instr[9,5] = Vs
5848 instr[4,0] = Vd */
5849
5850 int full = INSTR (30, 30);
5851 unsigned vs = INSTR (9, 5);
5852 unsigned vd = INSTR (4, 0);
5853 unsigned i;
5854
5855 NYI_assert (29, 24, 0x2E);
5856 NYI_assert (21, 10, 0x82E);
5857
5858 switch (INSTR (23, 22))
5859 {
5860 case 0:
5861 for (i = 0; i < (full ? 16 : 8); i++)
5862 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
5863 return;
5864
5865 case 1:
5866 for (i = 0; i < (full ? 8 : 4); i++)
5867 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
5868 return;
5869
5870 case 2:
5871 for (i = 0; i < (full ? 4 : 2); i++)
5872 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
5873 return;
5874
5875 case 3:
5876 if (! full)
5877 HALT_NYI;
5878 for (i = 0; i < 2; i++)
5879 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
5880 return;
5881 }
5882 }
5883
5884 static void
5885 do_vec_sqrt (sim_cpu *cpu)
5886 {
5887 /* instr[31] = 0
5888 instr[30] = full(1)/half(0)
5889 instr[29,23] = 101 1101
5890 instr[22] = single(0)/double(1)
5891 instr[21,10] = 1000 0111 1110
5892 instr[9,5] = Vs
5893 instr[4,0] = Vd. */
5894
5895 int full = INSTR (30, 30);
5896 unsigned vs = INSTR (9, 5);
5897 unsigned vd = INSTR (4, 0);
5898 unsigned i;
5899
5900 NYI_assert (29, 23, 0x5B);
5901 NYI_assert (21, 10, 0x87E);
5902
5903 if (INSTR (22, 22) == 0)
5904 for (i = 0; i < (full ? 4 : 2); i++)
5905 aarch64_set_vec_float (cpu, vd, i,
5906 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
5907 else
5908 for (i = 0; i < 2; i++)
5909 aarch64_set_vec_double (cpu, vd, i,
5910 sqrt (aarch64_get_vec_double (cpu, vs, i)));
5911 }
5912
5913 static void
5914 do_vec_mls_indexed (sim_cpu *cpu)
5915 {
5916 /* instr[31] = 0
5917 instr[30] = half(0)/full(1)
5918 instr[29,24] = 10 1111
5919 instr[23,22] = 16-bit(01)/32-bit(10)
5920 instr[21,20+11] = index (if 16-bit)
5921 instr[21+11] = index (if 32-bit)
5922 instr[20,16] = Vm
5923 instr[15,12] = 0100
5924 instr[11] = part of index
5925 instr[10] = 0
5926 instr[9,5] = Vs
5927 instr[4,0] = Vd. */
5928
5929 int full = INSTR (30, 30);
5930 unsigned vs = INSTR (9, 5);
5931 unsigned vd = INSTR (4, 0);
5932 unsigned vm = INSTR (20, 16);
5933 unsigned i;
5934
5935 NYI_assert (15, 12, 4);
5936 NYI_assert (10, 10, 0);
5937
5938 switch (INSTR (23, 22))
5939 {
5940 case 1:
5941 {
5942 unsigned elem;
5943 uint32_t val;
5944
5945 if (vm > 15)
5946 HALT_NYI;
5947
5948 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
5949 val = aarch64_get_vec_u16 (cpu, vm, elem);
5950
5951 for (i = 0; i < (full ? 8 : 4); i++)
5952 aarch64_set_vec_u32 (cpu, vd, i,
5953 aarch64_get_vec_u32 (cpu, vd, i) -
5954 (aarch64_get_vec_u32 (cpu, vs, i) * val));
5955 return;
5956 }
5957
5958 case 2:
5959 {
5960 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
5961 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
5962
5963 for (i = 0; i < (full ? 4 : 2); i++)
5964 aarch64_set_vec_u64 (cpu, vd, i,
5965 aarch64_get_vec_u64 (cpu, vd, i) -
5966 (aarch64_get_vec_u64 (cpu, vs, i) * val));
5967 return;
5968 }
5969
5970 case 0:
5971 case 3:
5972 default:
5973 HALT_NYI;
5974 }
5975 }
5976
5977 static void
5978 do_vec_SUB (sim_cpu *cpu)
5979 {
5980 /* instr [31] = 0
5981 instr [30] = half(0)/full(1)
5982 instr [29,24] = 10 1110
5983 instr [23,22] = size: byte(00, half(01), word (10), long (11)
5984 instr [21] = 1
5985 instr [20,16] = Vm
5986 instr [15,10] = 10 0001
5987 instr [9, 5] = Vn
5988 instr [4, 0] = Vd. */
5989
5990 unsigned full = INSTR (30, 30);
5991 unsigned vm = INSTR (20, 16);
5992 unsigned vn = INSTR (9, 5);
5993 unsigned vd = INSTR (4, 0);
5994 unsigned i;
5995
5996 NYI_assert (29, 24, 0x2E);
5997 NYI_assert (21, 21, 1);
5998 NYI_assert (15, 10, 0x21);
5999
6000 switch (INSTR (23, 22))
6001 {
6002 case 0:
6003 for (i = 0; i < (full ? 16 : 8); i++)
6004 aarch64_set_vec_s8 (cpu, vd, i,
6005 aarch64_get_vec_s8 (cpu, vn, i)
6006 - aarch64_get_vec_s8 (cpu, vm, i));
6007 return;
6008
6009 case 1:
6010 for (i = 0; i < (full ? 8 : 4); i++)
6011 aarch64_set_vec_s16 (cpu, vd, i,
6012 aarch64_get_vec_s16 (cpu, vn, i)
6013 - aarch64_get_vec_s16 (cpu, vm, i));
6014 return;
6015
6016 case 2:
6017 for (i = 0; i < (full ? 4 : 2); i++)
6018 aarch64_set_vec_s32 (cpu, vd, i,
6019 aarch64_get_vec_s32 (cpu, vn, i)
6020 - aarch64_get_vec_s32 (cpu, vm, i));
6021 return;
6022
6023 case 3:
6024 if (full == 0)
6025 HALT_UNALLOC;
6026
6027 for (i = 0; i < 2; i++)
6028 aarch64_set_vec_s64 (cpu, vd, i,
6029 aarch64_get_vec_s64 (cpu, vn, i)
6030 - aarch64_get_vec_s64 (cpu, vm, i));
6031 return;
6032 }
6033 }
6034
6035 static void
6036 do_vec_MLS (sim_cpu *cpu)
6037 {
6038 /* instr [31] = 0
6039 instr [30] = half(0)/full(1)
6040 instr [29,24] = 10 1110
6041 instr [23,22] = size: byte(00, half(01), word (10)
6042 instr [21] = 1
6043 instr [20,16] = Vm
6044 instr [15,10] = 10 0101
6045 instr [9, 5] = Vn
6046 instr [4, 0] = Vd. */
6047
6048 unsigned full = INSTR (30, 30);
6049 unsigned vm = INSTR (20, 16);
6050 unsigned vn = INSTR (9, 5);
6051 unsigned vd = INSTR (4, 0);
6052 unsigned i;
6053
6054 NYI_assert (29, 24, 0x2E);
6055 NYI_assert (21, 21, 1);
6056 NYI_assert (15, 10, 0x25);
6057
6058 switch (INSTR (23, 22))
6059 {
6060 case 0:
6061 for (i = 0; i < (full ? 16 : 8); i++)
6062 aarch64_set_vec_u8 (cpu, vd, i,
6063 (aarch64_get_vec_u8 (cpu, vn, i)
6064 * aarch64_get_vec_u8 (cpu, vm, i))
6065 - aarch64_get_vec_u8 (cpu, vd, i));
6066 return;
6067
6068 case 1:
6069 for (i = 0; i < (full ? 8 : 4); i++)
6070 aarch64_set_vec_u16 (cpu, vd, i,
6071 (aarch64_get_vec_u16 (cpu, vn, i)
6072 * aarch64_get_vec_u16 (cpu, vm, i))
6073 - aarch64_get_vec_u16 (cpu, vd, i));
6074 return;
6075
6076 case 2:
6077 for (i = 0; i < (full ? 4 : 2); i++)
6078 aarch64_set_vec_u32 (cpu, vd, i,
6079 (aarch64_get_vec_u32 (cpu, vn, i)
6080 * aarch64_get_vec_u32 (cpu, vm, i))
6081 - aarch64_get_vec_u32 (cpu, vd, i));
6082 return;
6083
6084 default:
6085 HALT_UNALLOC;
6086 }
6087 }
6088
6089 static void
6090 do_vec_FDIV (sim_cpu *cpu)
6091 {
6092 /* instr [31] = 0
6093 instr [30] = half(0)/full(1)
6094 instr [29,23] = 10 1110 0
6095 instr [22] = float()/double(1)
6096 instr [21] = 1
6097 instr [20,16] = Vm
6098 instr [15,10] = 1111 11
6099 instr [9, 5] = Vn
6100 instr [4, 0] = Vd. */
6101
6102 unsigned full = INSTR (30, 30);
6103 unsigned vm = INSTR (20, 16);
6104 unsigned vn = INSTR (9, 5);
6105 unsigned vd = INSTR (4, 0);
6106 unsigned i;
6107
6108 NYI_assert (29, 23, 0x5C);
6109 NYI_assert (21, 21, 1);
6110 NYI_assert (15, 10, 0x3F);
6111
6112 if (INSTR (22, 22))
6113 {
6114 if (! full)
6115 HALT_UNALLOC;
6116
6117 for (i = 0; i < 2; i++)
6118 aarch64_set_vec_double (cpu, vd, i,
6119 aarch64_get_vec_double (cpu, vn, i)
6120 / aarch64_get_vec_double (cpu, vm, i));
6121 }
6122 else
6123 for (i = 0; i < (full ? 4 : 2); i++)
6124 aarch64_set_vec_float (cpu, vd, i,
6125 aarch64_get_vec_float (cpu, vn, i)
6126 / aarch64_get_vec_float (cpu, vm, i));
6127 }
6128
6129 static void
6130 do_vec_FMUL (sim_cpu *cpu)
6131 {
6132 /* instr [31] = 0
6133 instr [30] = half(0)/full(1)
6134 instr [29,23] = 10 1110 0
6135 instr [22] = float(0)/double(1)
6136 instr [21] = 1
6137 instr [20,16] = Vm
6138 instr [15,10] = 1101 11
6139 instr [9, 5] = Vn
6140 instr [4, 0] = Vd. */
6141
6142 unsigned full = INSTR (30, 30);
6143 unsigned vm = INSTR (20, 16);
6144 unsigned vn = INSTR (9, 5);
6145 unsigned vd = INSTR (4, 0);
6146 unsigned i;
6147
6148 NYI_assert (29, 23, 0x5C);
6149 NYI_assert (21, 21, 1);
6150 NYI_assert (15, 10, 0x37);
6151
6152 if (INSTR (22, 22))
6153 {
6154 if (! full)
6155 HALT_UNALLOC;
6156
6157 for (i = 0; i < 2; i++)
6158 aarch64_set_vec_double (cpu, vd, i,
6159 aarch64_get_vec_double (cpu, vn, i)
6160 * aarch64_get_vec_double (cpu, vm, i));
6161 }
6162 else
6163 for (i = 0; i < (full ? 4 : 2); i++)
6164 aarch64_set_vec_float (cpu, vd, i,
6165 aarch64_get_vec_float (cpu, vn, i)
6166 * aarch64_get_vec_float (cpu, vm, i));
6167 }
6168
6169 static void
6170 do_vec_FADDP (sim_cpu *cpu)
6171 {
6172 /* instr [31] = 0
6173 instr [30] = half(0)/full(1)
6174 instr [29,23] = 10 1110 0
6175 instr [22] = float(0)/double(1)
6176 instr [21] = 1
6177 instr [20,16] = Vm
6178 instr [15,10] = 1101 01
6179 instr [9, 5] = Vn
6180 instr [4, 0] = Vd. */
6181
6182 unsigned full = INSTR (30, 30);
6183 unsigned vm = INSTR (20, 16);
6184 unsigned vn = INSTR (9, 5);
6185 unsigned vd = INSTR (4, 0);
6186
6187 NYI_assert (29, 23, 0x5C);
6188 NYI_assert (21, 21, 1);
6189 NYI_assert (15, 10, 0x35);
6190
6191 if (INSTR (22, 22))
6192 {
6193 /* Extract values before adding them incase vd == vn/vm. */
6194 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6195 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6196 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6197 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6198
6199 if (! full)
6200 HALT_UNALLOC;
6201
6202 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6203 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6204 }
6205 else
6206 {
6207 /* Extract values before adding them incase vd == vn/vm. */
6208 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6209 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6210 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6211 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6212
6213 if (full)
6214 {
6215 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6216 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6217 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6218 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6219
6220 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6221 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6222 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6223 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6224 }
6225 else
6226 {
6227 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6228 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6229 }
6230 }
6231 }
6232
6233 static void
6234 do_vec_FSQRT (sim_cpu *cpu)
6235 {
6236 /* instr[31] = 0
6237 instr[30] = half(0)/full(1)
6238 instr[29,23] = 10 1110 1
6239 instr[22] = single(0)/double(1)
6240 instr[21,10] = 10 0001 1111 10
6241 instr[9,5] = Vsrc
6242 instr[4,0] = Vdest. */
6243
6244 unsigned vn = INSTR (9, 5);
6245 unsigned vd = INSTR (4, 0);
6246 unsigned full = INSTR (30, 30);
6247 int i;
6248
6249 NYI_assert (29, 23, 0x5D);
6250 NYI_assert (21, 10, 0x87E);
6251
6252 if (INSTR (22, 22))
6253 {
6254 if (! full)
6255 HALT_UNALLOC;
6256
6257 for (i = 0; i < 2; i++)
6258 aarch64_set_vec_double (cpu, vd, i,
6259 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6260 }
6261 else
6262 {
6263 for (i = 0; i < (full ? 4 : 2); i++)
6264 aarch64_set_vec_float (cpu, vd, i,
6265 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6266 }
6267 }
6268
6269 static void
6270 do_vec_FNEG (sim_cpu *cpu)
6271 {
6272 /* instr[31] = 0
6273 instr[30] = half (0)/full (1)
6274 instr[29,23] = 10 1110 1
6275 instr[22] = single (0)/double (1)
6276 instr[21,10] = 10 0000 1111 10
6277 instr[9,5] = Vsrc
6278 instr[4,0] = Vdest. */
6279
6280 unsigned vn = INSTR (9, 5);
6281 unsigned vd = INSTR (4, 0);
6282 unsigned full = INSTR (30, 30);
6283 int i;
6284
6285 NYI_assert (29, 23, 0x5D);
6286 NYI_assert (21, 10, 0x83E);
6287
6288 if (INSTR (22, 22))
6289 {
6290 if (! full)
6291 HALT_UNALLOC;
6292
6293 for (i = 0; i < 2; i++)
6294 aarch64_set_vec_double (cpu, vd, i,
6295 - aarch64_get_vec_double (cpu, vn, i));
6296 }
6297 else
6298 {
6299 for (i = 0; i < (full ? 4 : 2); i++)
6300 aarch64_set_vec_float (cpu, vd, i,
6301 - aarch64_get_vec_float (cpu, vn, i));
6302 }
6303 }
6304
6305 static void
6306 do_vec_NOT (sim_cpu *cpu)
6307 {
6308 /* instr[31] = 0
6309 instr[30] = half (0)/full (1)
6310 instr[29,10] = 10 1110 0010 0000 0101 10
6311 instr[9,5] = Vn
6312 instr[4.0] = Vd. */
6313
6314 unsigned vn = INSTR (9, 5);
6315 unsigned vd = INSTR (4, 0);
6316 unsigned i;
6317 int full = INSTR (30, 30);
6318
6319 NYI_assert (29, 10, 0xB8816);
6320
6321 for (i = 0; i < (full ? 16 : 8); i++)
6322 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6323 }
6324
6325 static unsigned int
6326 clz (uint64_t val, unsigned size)
6327 {
6328 uint64_t mask = 1;
6329 int count;
6330
6331 mask <<= (size - 1);
6332 count = 0;
6333 do
6334 {
6335 if (val & mask)
6336 break;
6337 mask >>= 1;
6338 count ++;
6339 }
6340 while (mask);
6341
6342 return count;
6343 }
6344
6345 static void
6346 do_vec_CLZ (sim_cpu *cpu)
6347 {
6348 /* instr[31] = 0
6349 instr[30] = half (0)/full (1)
6350 instr[29,24] = 10 1110
6351 instr[23,22] = size
6352 instr[21,10] = 10 0000 0100 10
6353 instr[9,5] = Vn
6354 instr[4.0] = Vd. */
6355
6356 unsigned vn = INSTR (9, 5);
6357 unsigned vd = INSTR (4, 0);
6358 unsigned i;
6359 int full = INSTR (30,30);
6360
6361 NYI_assert (29, 24, 0x2E);
6362 NYI_assert (21, 10, 0x812);
6363
6364 switch (INSTR (23, 22))
6365 {
6366 case 0:
6367 for (i = 0; i < (full ? 16 : 8); i++)
6368 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6369 break;
6370 case 1:
6371 for (i = 0; i < (full ? 8 : 4); i++)
6372 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6373 break;
6374 case 2:
6375 for (i = 0; i < (full ? 4 : 2); i++)
6376 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6377 break;
6378 case 3:
6379 if (! full)
6380 HALT_UNALLOC;
6381 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6382 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6383 break;
6384 }
6385 }
6386
6387 static void
6388 do_vec_MOV_element (sim_cpu *cpu)
6389 {
6390 /* instr[31,21] = 0110 1110 000
6391 instr[20,16] = size & dest index
6392 instr[15] = 0
6393 instr[14,11] = source index
6394 instr[10] = 1
6395 instr[9,5] = Vs
6396 instr[4.0] = Vd. */
6397
6398 unsigned vs = INSTR (9, 5);
6399 unsigned vd = INSTR (4, 0);
6400 unsigned src_index;
6401 unsigned dst_index;
6402
6403 NYI_assert (31, 21, 0x370);
6404 NYI_assert (15, 15, 0);
6405 NYI_assert (10, 10, 1);
6406
6407 if (INSTR (16, 16))
6408 {
6409 /* Move a byte. */
6410 src_index = INSTR (14, 11);
6411 dst_index = INSTR (20, 17);
6412 aarch64_set_vec_u8 (cpu, vd, dst_index,
6413 aarch64_get_vec_u8 (cpu, vs, src_index));
6414 }
6415 else if (INSTR (17, 17))
6416 {
6417 /* Move 16-bits. */
6418 NYI_assert (11, 11, 0);
6419 src_index = INSTR (14, 12);
6420 dst_index = INSTR (20, 18);
6421 aarch64_set_vec_u16 (cpu, vd, dst_index,
6422 aarch64_get_vec_u16 (cpu, vs, src_index));
6423 }
6424 else if (INSTR (18, 18))
6425 {
6426 /* Move 32-bits. */
6427 NYI_assert (12, 11, 0);
6428 src_index = INSTR (14, 13);
6429 dst_index = INSTR (20, 19);
6430 aarch64_set_vec_u32 (cpu, vd, dst_index,
6431 aarch64_get_vec_u32 (cpu, vs, src_index));
6432 }
6433 else
6434 {
6435 NYI_assert (19, 19, 1);
6436 NYI_assert (13, 11, 0);
6437 src_index = INSTR (14, 14);
6438 dst_index = INSTR (20, 20);
6439 aarch64_set_vec_u64 (cpu, vd, dst_index,
6440 aarch64_get_vec_u64 (cpu, vs, src_index));
6441 }
6442 }
6443
6444 static void
6445 dexAdvSIMD0 (sim_cpu *cpu)
6446 {
6447 /* instr [28,25] = 0 111. */
6448 if ( INSTR (15, 10) == 0x07
6449 && (INSTR (9, 5) ==
6450 INSTR (20, 16)))
6451 {
6452 if (INSTR (31, 21) == 0x075
6453 || INSTR (31, 21) == 0x275)
6454 {
6455 do_vec_MOV_whole_vector (cpu);
6456 return;
6457 }
6458 }
6459
6460 if (INSTR (29, 19) == 0x1E0)
6461 {
6462 do_vec_MOV_immediate (cpu);
6463 return;
6464 }
6465
6466 if (INSTR (29, 19) == 0x5E0)
6467 {
6468 do_vec_MVNI (cpu);
6469 return;
6470 }
6471
6472 if (INSTR (29, 19) == 0x1C0
6473 || INSTR (29, 19) == 0x1C1)
6474 {
6475 if (INSTR (15, 10) == 0x03)
6476 {
6477 do_vec_DUP_scalar_into_vector (cpu);
6478 return;
6479 }
6480 }
6481
6482 switch (INSTR (29, 24))
6483 {
6484 case 0x0E: do_vec_op1 (cpu); return;
6485 case 0x0F: do_vec_op2 (cpu); return;
6486
6487 case 0x2f:
6488 switch (INSTR (15, 10))
6489 {
6490 case 0x01: do_vec_SSHR_USHR (cpu); return;
6491 case 0x10:
6492 case 0x12: do_vec_mls_indexed (cpu); return;
6493 case 0x29: do_vec_xtl (cpu); return;
6494 default:
6495 HALT_NYI;
6496 }
6497
6498 case 0x2E:
6499 if (INSTR (21, 21) == 1)
6500 {
6501 switch (INSTR (15, 10))
6502 {
6503 case 0x07:
6504 switch (INSTR (23, 22))
6505 {
6506 case 0: do_vec_EOR (cpu); return;
6507 case 1: do_vec_BSL (cpu); return;
6508 case 2:
6509 case 3: do_vec_bit (cpu); return;
6510 }
6511 break;
6512
6513 case 0x08: do_vec_sub_long (cpu); return;
6514 case 0x11: do_vec_USHL (cpu); return;
6515 case 0x12: do_vec_CLZ (cpu); return;
6516 case 0x16: do_vec_NOT (cpu); return;
6517 case 0x19: do_vec_max (cpu); return;
6518 case 0x1B: do_vec_min (cpu); return;
6519 case 0x21: do_vec_SUB (cpu); return;
6520 case 0x25: do_vec_MLS (cpu); return;
6521 case 0x31: do_vec_FminmaxNMP (cpu); return;
6522 case 0x35: do_vec_FADDP (cpu); return;
6523 case 0x37: do_vec_FMUL (cpu); return;
6524 case 0x3F: do_vec_FDIV (cpu); return;
6525
6526 case 0x3E:
6527 switch (INSTR (20, 16))
6528 {
6529 case 0x00: do_vec_FNEG (cpu); return;
6530 case 0x01: do_vec_FSQRT (cpu); return;
6531 default: HALT_NYI;
6532 }
6533
6534 case 0x0D:
6535 case 0x0F:
6536 case 0x22:
6537 case 0x23:
6538 case 0x26:
6539 case 0x2A:
6540 case 0x32:
6541 case 0x36:
6542 case 0x39:
6543 case 0x3A:
6544 do_vec_compare (cpu); return;
6545
6546 default:
6547 break;
6548 }
6549 }
6550
6551 if (INSTR (31, 21) == 0x370)
6552 {
6553 do_vec_MOV_element (cpu);
6554 return;
6555 }
6556
6557 switch (INSTR (21, 10))
6558 {
6559 case 0x82E: do_vec_neg (cpu); return;
6560 case 0x87E: do_vec_sqrt (cpu); return;
6561 default:
6562 if (INSTR (15, 10) == 0x30)
6563 {
6564 do_vec_mull (cpu);
6565 return;
6566 }
6567 break;
6568 }
6569 break;
6570
6571 default:
6572 break;
6573 }
6574
6575 HALT_NYI;
6576 }
6577
6578 /* 3 sources. */
6579
6580 /* Float multiply add. */
6581 static void
6582 fmadds (sim_cpu *cpu)
6583 {
6584 unsigned sa = INSTR (14, 10);
6585 unsigned sm = INSTR (20, 16);
6586 unsigned sn = INSTR ( 9, 5);
6587 unsigned sd = INSTR ( 4, 0);
6588
6589 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6590 + aarch64_get_FP_float (cpu, sn)
6591 * aarch64_get_FP_float (cpu, sm));
6592 }
6593
6594 /* Double multiply add. */
6595 static void
6596 fmaddd (sim_cpu *cpu)
6597 {
6598 unsigned sa = INSTR (14, 10);
6599 unsigned sm = INSTR (20, 16);
6600 unsigned sn = INSTR ( 9, 5);
6601 unsigned sd = INSTR ( 4, 0);
6602
6603 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6604 + aarch64_get_FP_double (cpu, sn)
6605 * aarch64_get_FP_double (cpu, sm));
6606 }
6607
6608 /* Float multiply subtract. */
6609 static void
6610 fmsubs (sim_cpu *cpu)
6611 {
6612 unsigned sa = INSTR (14, 10);
6613 unsigned sm = INSTR (20, 16);
6614 unsigned sn = INSTR ( 9, 5);
6615 unsigned sd = INSTR ( 4, 0);
6616
6617 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6618 - aarch64_get_FP_float (cpu, sn)
6619 * aarch64_get_FP_float (cpu, sm));
6620 }
6621
6622 /* Double multiply subtract. */
6623 static void
6624 fmsubd (sim_cpu *cpu)
6625 {
6626 unsigned sa = INSTR (14, 10);
6627 unsigned sm = INSTR (20, 16);
6628 unsigned sn = INSTR ( 9, 5);
6629 unsigned sd = INSTR ( 4, 0);
6630
6631 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6632 - aarch64_get_FP_double (cpu, sn)
6633 * aarch64_get_FP_double (cpu, sm));
6634 }
6635
6636 /* Float negative multiply add. */
6637 static void
6638 fnmadds (sim_cpu *cpu)
6639 {
6640 unsigned sa = INSTR (14, 10);
6641 unsigned sm = INSTR (20, 16);
6642 unsigned sn = INSTR ( 9, 5);
6643 unsigned sd = INSTR ( 4, 0);
6644
6645 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
6646 + (- aarch64_get_FP_float (cpu, sn))
6647 * aarch64_get_FP_float (cpu, sm));
6648 }
6649
6650 /* Double negative multiply add. */
6651 static void
6652 fnmaddd (sim_cpu *cpu)
6653 {
6654 unsigned sa = INSTR (14, 10);
6655 unsigned sm = INSTR (20, 16);
6656 unsigned sn = INSTR ( 9, 5);
6657 unsigned sd = INSTR ( 4, 0);
6658
6659 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
6660 + (- aarch64_get_FP_double (cpu, sn))
6661 * aarch64_get_FP_double (cpu, sm));
6662 }
6663
6664 /* Float negative multiply subtract. */
6665 static void
6666 fnmsubs (sim_cpu *cpu)
6667 {
6668 unsigned sa = INSTR (14, 10);
6669 unsigned sm = INSTR (20, 16);
6670 unsigned sn = INSTR ( 9, 5);
6671 unsigned sd = INSTR ( 4, 0);
6672
6673 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
6674 + aarch64_get_FP_float (cpu, sn)
6675 * aarch64_get_FP_float (cpu, sm));
6676 }
6677
6678 /* Double negative multiply subtract. */
6679 static void
6680 fnmsubd (sim_cpu *cpu)
6681 {
6682 unsigned sa = INSTR (14, 10);
6683 unsigned sm = INSTR (20, 16);
6684 unsigned sn = INSTR ( 9, 5);
6685 unsigned sd = INSTR ( 4, 0);
6686
6687 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
6688 + aarch64_get_FP_double (cpu, sn)
6689 * aarch64_get_FP_double (cpu, sm));
6690 }
6691
6692 static void
6693 dexSimpleFPDataProc3Source (sim_cpu *cpu)
6694 {
6695 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
6696 instr[30] = 0
6697 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
6698 instr[28,25] = 1111
6699 instr[24] = 1
6700 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
6701 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
6702 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
6703
6704 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
6705 /* dispatch on combined type:o1:o2. */
6706 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
6707
6708 if (M_S != 0)
6709 HALT_UNALLOC;
6710
6711 switch (dispatch)
6712 {
6713 case 0: fmadds (cpu); return;
6714 case 1: fmsubs (cpu); return;
6715 case 2: fnmadds (cpu); return;
6716 case 3: fnmsubs (cpu); return;
6717 case 4: fmaddd (cpu); return;
6718 case 5: fmsubd (cpu); return;
6719 case 6: fnmaddd (cpu); return;
6720 case 7: fnmsubd (cpu); return;
6721 default:
6722 /* type > 1 is currently unallocated. */
6723 HALT_UNALLOC;
6724 }
6725 }
6726
6727 static void
6728 dexSimpleFPFixedConvert (sim_cpu *cpu)
6729 {
6730 HALT_NYI;
6731 }
6732
6733 static void
6734 dexSimpleFPCondCompare (sim_cpu *cpu)
6735 {
6736 /* instr [31,23] = 0001 1110 0
6737 instr [22] = type
6738 instr [21] = 1
6739 instr [20,16] = Rm
6740 instr [15,12] = condition
6741 instr [11,10] = 01
6742 instr [9,5] = Rn
6743 instr [4] = 0
6744 instr [3,0] = nzcv */
6745
6746 unsigned rm = INSTR (20, 16);
6747 unsigned rn = INSTR (9, 5);
6748
6749 NYI_assert (31, 23, 0x3C);
6750 NYI_assert (11, 10, 0x1);
6751 NYI_assert (4, 4, 0);
6752
6753 if (! testConditionCode (cpu, INSTR (15, 12)))
6754 {
6755 aarch64_set_CPSR (cpu, INSTR (3, 0));
6756 return;
6757 }
6758
6759 if (INSTR (22, 22))
6760 {
6761 /* Double precision. */
6762 double val1 = aarch64_get_vec_double (cpu, rn, 0);
6763 double val2 = aarch64_get_vec_double (cpu, rm, 0);
6764
6765 /* FIXME: Check for NaNs. */
6766 if (val1 == val2)
6767 aarch64_set_CPSR (cpu, (Z | C));
6768 else if (val1 < val2)
6769 aarch64_set_CPSR (cpu, N);
6770 else /* val1 > val2 */
6771 aarch64_set_CPSR (cpu, C);
6772 }
6773 else
6774 {
6775 /* Single precision. */
6776 float val1 = aarch64_get_vec_float (cpu, rn, 0);
6777 float val2 = aarch64_get_vec_float (cpu, rm, 0);
6778
6779 /* FIXME: Check for NaNs. */
6780 if (val1 == val2)
6781 aarch64_set_CPSR (cpu, (Z | C));
6782 else if (val1 < val2)
6783 aarch64_set_CPSR (cpu, N);
6784 else /* val1 > val2 */
6785 aarch64_set_CPSR (cpu, C);
6786 }
6787 }
6788
6789 /* 2 sources. */
6790
6791 /* Float add. */
6792 static void
6793 fadds (sim_cpu *cpu)
6794 {
6795 unsigned sm = INSTR (20, 16);
6796 unsigned sn = INSTR ( 9, 5);
6797 unsigned sd = INSTR ( 4, 0);
6798
6799 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
6800 + aarch64_get_FP_float (cpu, sm));
6801 }
6802
6803 /* Double add. */
6804 static void
6805 faddd (sim_cpu *cpu)
6806 {
6807 unsigned sm = INSTR (20, 16);
6808 unsigned sn = INSTR ( 9, 5);
6809 unsigned sd = INSTR ( 4, 0);
6810
6811 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
6812 + aarch64_get_FP_double (cpu, sm));
6813 }
6814
6815 /* Float divide. */
6816 static void
6817 fdivs (sim_cpu *cpu)
6818 {
6819 unsigned sm = INSTR (20, 16);
6820 unsigned sn = INSTR ( 9, 5);
6821 unsigned sd = INSTR ( 4, 0);
6822
6823 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
6824 / aarch64_get_FP_float (cpu, sm));
6825 }
6826
6827 /* Double divide. */
6828 static void
6829 fdivd (sim_cpu *cpu)
6830 {
6831 unsigned sm = INSTR (20, 16);
6832 unsigned sn = INSTR ( 9, 5);
6833 unsigned sd = INSTR ( 4, 0);
6834
6835 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
6836 / aarch64_get_FP_double (cpu, sm));
6837 }
6838
6839 /* Float multiply. */
6840 static void
6841 fmuls (sim_cpu *cpu)
6842 {
6843 unsigned sm = INSTR (20, 16);
6844 unsigned sn = INSTR ( 9, 5);
6845 unsigned sd = INSTR ( 4, 0);
6846
6847 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
6848 * aarch64_get_FP_float (cpu, sm));
6849 }
6850
6851 /* Double multiply. */
6852 static void
6853 fmuld (sim_cpu *cpu)
6854 {
6855 unsigned sm = INSTR (20, 16);
6856 unsigned sn = INSTR ( 9, 5);
6857 unsigned sd = INSTR ( 4, 0);
6858
6859 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
6860 * aarch64_get_FP_double (cpu, sm));
6861 }
6862
6863 /* Float negate and multiply. */
6864 static void
6865 fnmuls (sim_cpu *cpu)
6866 {
6867 unsigned sm = INSTR (20, 16);
6868 unsigned sn = INSTR ( 9, 5);
6869 unsigned sd = INSTR ( 4, 0);
6870
6871 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
6872 * aarch64_get_FP_float (cpu, sm)));
6873 }
6874
6875 /* Double negate and multiply. */
6876 static void
6877 fnmuld (sim_cpu *cpu)
6878 {
6879 unsigned sm = INSTR (20, 16);
6880 unsigned sn = INSTR ( 9, 5);
6881 unsigned sd = INSTR ( 4, 0);
6882
6883 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
6884 * aarch64_get_FP_double (cpu, sm)));
6885 }
6886
6887 /* Float subtract. */
6888 static void
6889 fsubs (sim_cpu *cpu)
6890 {
6891 unsigned sm = INSTR (20, 16);
6892 unsigned sn = INSTR ( 9, 5);
6893 unsigned sd = INSTR ( 4, 0);
6894
6895 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
6896 - aarch64_get_FP_float (cpu, sm));
6897 }
6898
6899 /* Double subtract. */
6900 static void
6901 fsubd (sim_cpu *cpu)
6902 {
6903 unsigned sm = INSTR (20, 16);
6904 unsigned sn = INSTR ( 9, 5);
6905 unsigned sd = INSTR ( 4, 0);
6906
6907 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
6908 - aarch64_get_FP_double (cpu, sm));
6909 }
6910
6911 static void
6912 do_FMINNM (sim_cpu *cpu)
6913 {
6914 /* instr[31,23] = 0 0011 1100
6915 instr[22] = float(0)/double(1)
6916 instr[21] = 1
6917 instr[20,16] = Sm
6918 instr[15,10] = 01 1110
6919 instr[9,5] = Sn
6920 instr[4,0] = Cpu */
6921
6922 unsigned sm = INSTR (20, 16);
6923 unsigned sn = INSTR ( 9, 5);
6924 unsigned sd = INSTR ( 4, 0);
6925
6926 NYI_assert (31, 23, 0x03C);
6927 NYI_assert (15, 10, 0x1E);
6928
6929 if (INSTR (22, 22))
6930 aarch64_set_FP_double (cpu, sd,
6931 dminnm (aarch64_get_FP_double (cpu, sn),
6932 aarch64_get_FP_double (cpu, sm)));
6933 else
6934 aarch64_set_FP_float (cpu, sd,
6935 fminnm (aarch64_get_FP_float (cpu, sn),
6936 aarch64_get_FP_float (cpu, sm)));
6937 }
6938
6939 static void
6940 do_FMAXNM (sim_cpu *cpu)
6941 {
6942 /* instr[31,23] = 0 0011 1100
6943 instr[22] = float(0)/double(1)
6944 instr[21] = 1
6945 instr[20,16] = Sm
6946 instr[15,10] = 01 1010
6947 instr[9,5] = Sn
6948 instr[4,0] = Cpu */
6949
6950 unsigned sm = INSTR (20, 16);
6951 unsigned sn = INSTR ( 9, 5);
6952 unsigned sd = INSTR ( 4, 0);
6953
6954 NYI_assert (31, 23, 0x03C);
6955 NYI_assert (15, 10, 0x1A);
6956
6957 if (INSTR (22, 22))
6958 aarch64_set_FP_double (cpu, sd,
6959 dmaxnm (aarch64_get_FP_double (cpu, sn),
6960 aarch64_get_FP_double (cpu, sm)));
6961 else
6962 aarch64_set_FP_float (cpu, sd,
6963 fmaxnm (aarch64_get_FP_float (cpu, sn),
6964 aarch64_get_FP_float (cpu, sm)));
6965 }
6966
6967 static void
6968 dexSimpleFPDataProc2Source (sim_cpu *cpu)
6969 {
6970 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
6971 instr[30] = 0
6972 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
6973 instr[28,25] = 1111
6974 instr[24] = 0
6975 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
6976 instr[21] = 1
6977 instr[20,16] = Vm
6978 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
6979 0010 ==> FADD, 0011 ==> FSUB,
6980 0100 ==> FMAX, 0101 ==> FMIN
6981 0110 ==> FMAXNM, 0111 ==> FMINNM
6982 1000 ==> FNMUL, ow ==> UNALLOC
6983 instr[11,10] = 10
6984 instr[9,5] = Vn
6985 instr[4,0] = Vd */
6986
6987 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
6988 uint32_t type = INSTR (23, 22);
6989 /* Dispatch on opcode. */
6990 uint32_t dispatch = INSTR (15, 12);
6991
6992 if (type > 1)
6993 HALT_UNALLOC;
6994
6995 if (M_S != 0)
6996 HALT_UNALLOC;
6997
6998 if (type)
6999 switch (dispatch)
7000 {
7001 case 0: fmuld (cpu); return;
7002 case 1: fdivd (cpu); return;
7003 case 2: faddd (cpu); return;
7004 case 3: fsubd (cpu); return;
7005 case 6: do_FMAXNM (cpu); return;
7006 case 7: do_FMINNM (cpu); return;
7007 case 8: fnmuld (cpu); return;
7008
7009 /* Have not yet implemented fmax and fmin. */
7010 case 4:
7011 case 5:
7012 HALT_NYI;
7013
7014 default:
7015 HALT_UNALLOC;
7016 }
7017 else /* type == 0 => floats. */
7018 switch (dispatch)
7019 {
7020 case 0: fmuls (cpu); return;
7021 case 1: fdivs (cpu); return;
7022 case 2: fadds (cpu); return;
7023 case 3: fsubs (cpu); return;
7024 case 6: do_FMAXNM (cpu); return;
7025 case 7: do_FMINNM (cpu); return;
7026 case 8: fnmuls (cpu); return;
7027
7028 case 4:
7029 case 5:
7030 HALT_NYI;
7031
7032 default:
7033 HALT_UNALLOC;
7034 }
7035 }
7036
7037 static void
7038 dexSimpleFPCondSelect (sim_cpu *cpu)
7039 {
7040 /* FCSEL
7041 instr[31,23] = 0 0011 1100
7042 instr[22] = 0=>single 1=>double
7043 instr[21] = 1
7044 instr[20,16] = Sm
7045 instr[15,12] = cond
7046 instr[11,10] = 11
7047 instr[9,5] = Sn
7048 instr[4,0] = Cpu */
7049 unsigned sm = INSTR (20, 16);
7050 unsigned sn = INSTR ( 9, 5);
7051 unsigned sd = INSTR ( 4, 0);
7052 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7053
7054 NYI_assert (31, 23, 0x03C);
7055 NYI_assert (11, 10, 0x3);
7056
7057 if (INSTR (22, 22))
7058 aarch64_set_FP_double (cpu, sd, set ? sn : sm);
7059 else
7060 aarch64_set_FP_float (cpu, sd, set ? sn : sm);
7061 }
7062
7063 /* Store 32 bit unscaled signed 9 bit. */
7064 static void
7065 fsturs (sim_cpu *cpu, int32_t offset)
7066 {
7067 unsigned int rn = INSTR (9, 5);
7068 unsigned int st = INSTR (4, 0);
7069
7070 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7071 aarch64_get_vec_u32 (cpu, rn, 0));
7072 }
7073
7074 /* Store 64 bit unscaled signed 9 bit. */
7075 static void
7076 fsturd (sim_cpu *cpu, int32_t offset)
7077 {
7078 unsigned int rn = INSTR (9, 5);
7079 unsigned int st = INSTR (4, 0);
7080
7081 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7082 aarch64_get_vec_u64 (cpu, rn, 0));
7083 }
7084
7085 /* Store 128 bit unscaled signed 9 bit. */
7086 static void
7087 fsturq (sim_cpu *cpu, int32_t offset)
7088 {
7089 unsigned int rn = INSTR (9, 5);
7090 unsigned int st = INSTR (4, 0);
7091 FRegister a;
7092
7093 aarch64_get_FP_long_double (cpu, rn, & a);
7094 aarch64_set_mem_long_double (cpu,
7095 aarch64_get_reg_u64 (cpu, st, 1)
7096 + offset, a);
7097 }
7098
7099 /* TODO FP move register. */
7100
7101 /* 32 bit fp to fp move register. */
7102 static void
7103 ffmovs (sim_cpu *cpu)
7104 {
7105 unsigned int rn = INSTR (9, 5);
7106 unsigned int st = INSTR (4, 0);
7107
7108 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7109 }
7110
7111 /* 64 bit fp to fp move register. */
7112 static void
7113 ffmovd (sim_cpu *cpu)
7114 {
7115 unsigned int rn = INSTR (9, 5);
7116 unsigned int st = INSTR (4, 0);
7117
7118 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7119 }
7120
7121 /* 32 bit GReg to Vec move register. */
7122 static void
7123 fgmovs (sim_cpu *cpu)
7124 {
7125 unsigned int rn = INSTR (9, 5);
7126 unsigned int st = INSTR (4, 0);
7127
7128 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7129 }
7130
7131 /* 64 bit g to fp move register. */
7132 static void
7133 fgmovd (sim_cpu *cpu)
7134 {
7135 unsigned int rn = INSTR (9, 5);
7136 unsigned int st = INSTR (4, 0);
7137
7138 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7139 }
7140
7141 /* 32 bit fp to g move register. */
7142 static void
7143 gfmovs (sim_cpu *cpu)
7144 {
7145 unsigned int rn = INSTR (9, 5);
7146 unsigned int st = INSTR (4, 0);
7147
7148 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7149 }
7150
7151 /* 64 bit fp to g move register. */
7152 static void
7153 gfmovd (sim_cpu *cpu)
7154 {
7155 unsigned int rn = INSTR (9, 5);
7156 unsigned int st = INSTR (4, 0);
7157
7158 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7159 }
7160
7161 /* FP move immediate
7162
7163 These install an immediate 8 bit value in the target register
7164 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7165 bit exponent. */
7166
7167 static void
7168 fmovs (sim_cpu *cpu)
7169 {
7170 unsigned int sd = INSTR (4, 0);
7171 uint32_t imm = INSTR (20, 13);
7172 float f = fp_immediate_for_encoding_32 (imm);
7173
7174 aarch64_set_FP_float (cpu, sd, f);
7175 }
7176
7177 static void
7178 fmovd (sim_cpu *cpu)
7179 {
7180 unsigned int sd = INSTR (4, 0);
7181 uint32_t imm = INSTR (20, 13);
7182 double d = fp_immediate_for_encoding_64 (imm);
7183
7184 aarch64_set_FP_double (cpu, sd, d);
7185 }
7186
7187 static void
7188 dexSimpleFPImmediate (sim_cpu *cpu)
7189 {
7190 /* instr[31,23] == 00111100
7191 instr[22] == type : single(0)/double(1)
7192 instr[21] == 1
7193 instr[20,13] == imm8
7194 instr[12,10] == 100
7195 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7196 instr[4,0] == Rd */
7197 uint32_t imm5 = INSTR (9, 5);
7198
7199 NYI_assert (31, 23, 0x3C);
7200
7201 if (imm5 != 0)
7202 HALT_UNALLOC;
7203
7204 if (INSTR (22, 22))
7205 fmovd (cpu);
7206 else
7207 fmovs (cpu);
7208 }
7209
7210 /* TODO specific decode and execute for group Load Store. */
7211
7212 /* TODO FP load/store single register (unscaled offset). */
7213
7214 /* TODO load 8 bit unscaled signed 9 bit. */
7215 /* TODO load 16 bit unscaled signed 9 bit. */
7216
7217 /* Load 32 bit unscaled signed 9 bit. */
7218 static void
7219 fldurs (sim_cpu *cpu, int32_t offset)
7220 {
7221 unsigned int rn = INSTR (9, 5);
7222 unsigned int st = INSTR (4, 0);
7223
7224 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7225 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7226 }
7227
7228 /* Load 64 bit unscaled signed 9 bit. */
7229 static void
7230 fldurd (sim_cpu *cpu, int32_t offset)
7231 {
7232 unsigned int rn = INSTR (9, 5);
7233 unsigned int st = INSTR (4, 0);
7234
7235 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7236 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7237 }
7238
7239 /* Load 128 bit unscaled signed 9 bit. */
7240 static void
7241 fldurq (sim_cpu *cpu, int32_t offset)
7242 {
7243 unsigned int rn = INSTR (9, 5);
7244 unsigned int st = INSTR (4, 0);
7245 FRegister a;
7246 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7247
7248 aarch64_get_mem_long_double (cpu, addr, & a);
7249 aarch64_set_FP_long_double (cpu, st, a);
7250 }
7251
7252 /* TODO store 8 bit unscaled signed 9 bit. */
7253 /* TODO store 16 bit unscaled signed 9 bit. */
7254
7255
7256 /* 1 source. */
7257
7258 /* Float absolute value. */
7259 static void
7260 fabss (sim_cpu *cpu)
7261 {
7262 unsigned sn = INSTR (9, 5);
7263 unsigned sd = INSTR (4, 0);
7264 float value = aarch64_get_FP_float (cpu, sn);
7265
7266 aarch64_set_FP_float (cpu, sd, fabsf (value));
7267 }
7268
7269 /* Double absolute value. */
7270 static void
7271 fabcpu (sim_cpu *cpu)
7272 {
7273 unsigned sn = INSTR (9, 5);
7274 unsigned sd = INSTR (4, 0);
7275 double value = aarch64_get_FP_double (cpu, sn);
7276
7277 aarch64_set_FP_double (cpu, sd, fabs (value));
7278 }
7279
7280 /* Float negative value. */
7281 static void
7282 fnegs (sim_cpu *cpu)
7283 {
7284 unsigned sn = INSTR (9, 5);
7285 unsigned sd = INSTR (4, 0);
7286
7287 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7288 }
7289
7290 /* Double negative value. */
7291 static void
7292 fnegd (sim_cpu *cpu)
7293 {
7294 unsigned sn = INSTR (9, 5);
7295 unsigned sd = INSTR (4, 0);
7296
7297 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7298 }
7299
7300 /* Float square root. */
7301 static void
7302 fsqrts (sim_cpu *cpu)
7303 {
7304 unsigned sn = INSTR (9, 5);
7305 unsigned sd = INSTR (4, 0);
7306
7307 aarch64_set_FP_float (cpu, sd, sqrt (aarch64_get_FP_float (cpu, sn)));
7308 }
7309
7310 /* Double square root. */
7311 static void
7312 fsqrtd (sim_cpu *cpu)
7313 {
7314 unsigned sn = INSTR (9, 5);
7315 unsigned sd = INSTR (4, 0);
7316
7317 aarch64_set_FP_double (cpu, sd,
7318 sqrt (aarch64_get_FP_double (cpu, sn)));
7319 }
7320
7321 /* Convert double to float. */
7322 static void
7323 fcvtds (sim_cpu *cpu)
7324 {
7325 unsigned sn = INSTR (9, 5);
7326 unsigned sd = INSTR (4, 0);
7327
7328 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7329 }
7330
7331 /* Convert float to double. */
7332 static void
7333 fcvtcpu (sim_cpu *cpu)
7334 {
7335 unsigned sn = INSTR (9, 5);
7336 unsigned sd = INSTR (4, 0);
7337
7338 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7339 }
7340
7341 static void
7342 do_FRINT (sim_cpu *cpu)
7343 {
7344 /* instr[31,23] = 0001 1110 0
7345 instr[22] = single(0)/double(1)
7346 instr[21,18] = 1001
7347 instr[17,15] = rounding mode
7348 instr[14,10] = 10000
7349 instr[9,5] = source
7350 instr[4,0] = dest */
7351
7352 float val;
7353 unsigned rs = INSTR (9, 5);
7354 unsigned rd = INSTR (4, 0);
7355 unsigned int rmode = INSTR (17, 15);
7356
7357 NYI_assert (31, 23, 0x03C);
7358 NYI_assert (21, 18, 0x9);
7359 NYI_assert (14, 10, 0x10);
7360
7361 if (rmode == 6 || rmode == 7)
7362 /* FIXME: Add support for rmode == 6 exactness check. */
7363 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7364
7365 if (INSTR (22, 22))
7366 {
7367 double val = aarch64_get_FP_double (cpu, rs);
7368
7369 switch (rmode)
7370 {
7371 case 0: /* mode N: nearest or even. */
7372 {
7373 double rval = round (val);
7374
7375 if (val - rval == 0.5)
7376 {
7377 if (((rval / 2.0) * 2.0) != rval)
7378 rval += 1.0;
7379 }
7380
7381 aarch64_set_FP_double (cpu, rd, round (val));
7382 return;
7383 }
7384
7385 case 1: /* mode P: towards +inf. */
7386 if (val < 0.0)
7387 aarch64_set_FP_double (cpu, rd, trunc (val));
7388 else
7389 aarch64_set_FP_double (cpu, rd, round (val));
7390 return;
7391
7392 case 2: /* mode M: towards -inf. */
7393 if (val < 0.0)
7394 aarch64_set_FP_double (cpu, rd, round (val));
7395 else
7396 aarch64_set_FP_double (cpu, rd, trunc (val));
7397 return;
7398
7399 case 3: /* mode Z: towards 0. */
7400 aarch64_set_FP_double (cpu, rd, trunc (val));
7401 return;
7402
7403 case 4: /* mode A: away from 0. */
7404 aarch64_set_FP_double (cpu, rd, round (val));
7405 return;
7406
7407 case 6: /* mode X: use FPCR with exactness check. */
7408 case 7: /* mode I: use FPCR mode. */
7409 HALT_NYI;
7410
7411 default:
7412 HALT_UNALLOC;
7413 }
7414 }
7415
7416 val = aarch64_get_FP_float (cpu, rs);
7417
7418 switch (rmode)
7419 {
7420 case 0: /* mode N: nearest or even. */
7421 {
7422 float rval = roundf (val);
7423
7424 if (val - rval == 0.5)
7425 {
7426 if (((rval / 2.0) * 2.0) != rval)
7427 rval += 1.0;
7428 }
7429
7430 aarch64_set_FP_float (cpu, rd, rval);
7431 return;
7432 }
7433
7434 case 1: /* mode P: towards +inf. */
7435 if (val < 0.0)
7436 aarch64_set_FP_float (cpu, rd, truncf (val));
7437 else
7438 aarch64_set_FP_float (cpu, rd, roundf (val));
7439 return;
7440
7441 case 2: /* mode M: towards -inf. */
7442 if (val < 0.0)
7443 aarch64_set_FP_float (cpu, rd, truncf (val));
7444 else
7445 aarch64_set_FP_float (cpu, rd, roundf (val));
7446 return;
7447
7448 case 3: /* mode Z: towards 0. */
7449 aarch64_set_FP_float (cpu, rd, truncf (val));
7450 return;
7451
7452 case 4: /* mode A: away from 0. */
7453 aarch64_set_FP_float (cpu, rd, roundf (val));
7454 return;
7455
7456 case 6: /* mode X: use FPCR with exactness check. */
7457 case 7: /* mode I: use FPCR mode. */
7458 HALT_NYI;
7459
7460 default:
7461 HALT_UNALLOC;
7462 }
7463 }
7464
7465 /* Convert half to float. */
7466 static void
7467 do_FCVT_half_to_single (sim_cpu *cpu)
7468 {
7469 unsigned rn = INSTR (9, 5);
7470 unsigned rd = INSTR (4, 0);
7471
7472 NYI_assert (31, 10, 0x7B890);
7473
7474 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
7475 }
7476
7477 /* Convert half to double. */
7478 static void
7479 do_FCVT_half_to_double (sim_cpu *cpu)
7480 {
7481 unsigned rn = INSTR (9, 5);
7482 unsigned rd = INSTR (4, 0);
7483
7484 NYI_assert (31, 10, 0x7B8B0);
7485
7486 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
7487 }
7488
7489 static void
7490 do_FCVT_single_to_half (sim_cpu *cpu)
7491 {
7492 unsigned rn = INSTR (9, 5);
7493 unsigned rd = INSTR (4, 0);
7494
7495 NYI_assert (31, 10, 0x788F0);
7496
7497 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
7498 }
7499
7500 /* Convert double to half. */
7501 static void
7502 do_FCVT_double_to_half (sim_cpu *cpu)
7503 {
7504 unsigned rn = INSTR (9, 5);
7505 unsigned rd = INSTR (4, 0);
7506
7507 NYI_assert (31, 10, 0x798F0);
7508
7509 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
7510 }
7511
7512 static void
7513 dexSimpleFPDataProc1Source (sim_cpu *cpu)
7514 {
7515 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7516 instr[30] = 0
7517 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7518 instr[28,25] = 1111
7519 instr[24] = 0
7520 instr[23,22] ==> type : 00 ==> source is single,
7521 01 ==> source is double
7522 10 ==> UNALLOC
7523 11 ==> UNALLOC or source is half
7524 instr[21] = 1
7525 instr[20,15] ==> opcode : with type 00 or 01
7526 000000 ==> FMOV, 000001 ==> FABS,
7527 000010 ==> FNEG, 000011 ==> FSQRT,
7528 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
7529 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
7530 001000 ==> FRINTN, 001001 ==> FRINTP,
7531 001010 ==> FRINTM, 001011 ==> FRINTZ,
7532 001100 ==> FRINTA, 001101 ==> UNALLOC
7533 001110 ==> FRINTX, 001111 ==> FRINTI
7534 with type 11
7535 000100 ==> FCVT (half-to-single)
7536 000101 ==> FCVT (half-to-double)
7537 instr[14,10] = 10000. */
7538
7539 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7540 uint32_t type = INSTR (23, 22);
7541 uint32_t opcode = INSTR (20, 15);
7542
7543 if (M_S != 0)
7544 HALT_UNALLOC;
7545
7546 if (type == 3)
7547 {
7548 if (opcode == 4)
7549 do_FCVT_half_to_single (cpu);
7550 else if (opcode == 5)
7551 do_FCVT_half_to_double (cpu);
7552 else
7553 HALT_UNALLOC;
7554 return;
7555 }
7556
7557 if (type == 2)
7558 HALT_UNALLOC;
7559
7560 switch (opcode)
7561 {
7562 case 0:
7563 if (type)
7564 ffmovd (cpu);
7565 else
7566 ffmovs (cpu);
7567 return;
7568
7569 case 1:
7570 if (type)
7571 fabcpu (cpu);
7572 else
7573 fabss (cpu);
7574 return;
7575
7576 case 2:
7577 if (type)
7578 fnegd (cpu);
7579 else
7580 fnegs (cpu);
7581 return;
7582
7583 case 3:
7584 if (type)
7585 fsqrtd (cpu);
7586 else
7587 fsqrts (cpu);
7588 return;
7589
7590 case 4:
7591 if (type)
7592 fcvtds (cpu);
7593 else
7594 HALT_UNALLOC;
7595 return;
7596
7597 case 5:
7598 if (type)
7599 HALT_UNALLOC;
7600 fcvtcpu (cpu);
7601 return;
7602
7603 case 8: /* FRINTN etc. */
7604 case 9:
7605 case 10:
7606 case 11:
7607 case 12:
7608 case 14:
7609 case 15:
7610 do_FRINT (cpu);
7611 return;
7612
7613 case 7:
7614 if (INSTR (22, 22))
7615 do_FCVT_double_to_half (cpu);
7616 else
7617 do_FCVT_single_to_half (cpu);
7618 return;
7619
7620 case 13:
7621 HALT_NYI;
7622
7623 default:
7624 HALT_UNALLOC;
7625 }
7626 }
7627
7628 /* 32 bit signed int to float. */
7629 static void
7630 scvtf32 (sim_cpu *cpu)
7631 {
7632 unsigned rn = INSTR (9, 5);
7633 unsigned sd = INSTR (4, 0);
7634
7635 aarch64_set_FP_float
7636 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
7637 }
7638
7639 /* signed int to float. */
7640 static void
7641 scvtf (sim_cpu *cpu)
7642 {
7643 unsigned rn = INSTR (9, 5);
7644 unsigned sd = INSTR (4, 0);
7645
7646 aarch64_set_FP_float
7647 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
7648 }
7649
7650 /* 32 bit signed int to double. */
7651 static void
7652 scvtd32 (sim_cpu *cpu)
7653 {
7654 unsigned rn = INSTR (9, 5);
7655 unsigned sd = INSTR (4, 0);
7656
7657 aarch64_set_FP_double
7658 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
7659 }
7660
7661 /* signed int to double. */
7662 static void
7663 scvtd (sim_cpu *cpu)
7664 {
7665 unsigned rn = INSTR (9, 5);
7666 unsigned sd = INSTR (4, 0);
7667
7668 aarch64_set_FP_double
7669 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
7670 }
7671
7672 static const float FLOAT_INT_MAX = (float) INT_MAX;
7673 static const float FLOAT_INT_MIN = (float) INT_MIN;
7674 static const double DOUBLE_INT_MAX = (double) INT_MAX;
7675 static const double DOUBLE_INT_MIN = (double) INT_MIN;
7676 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
7677 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
7678 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
7679 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
7680
7681 /* Check for FP exception conditions:
7682 NaN raises IO
7683 Infinity raises IO
7684 Out of Range raises IO and IX and saturates value
7685 Denormal raises ID and IX and sets to zero. */
7686 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
7687 do \
7688 { \
7689 switch (fpclassify (F)) \
7690 { \
7691 case FP_INFINITE: \
7692 case FP_NAN: \
7693 aarch64_set_FPSR (cpu, IO); \
7694 if (signbit (F)) \
7695 VALUE = ITYPE##_MAX; \
7696 else \
7697 VALUE = ITYPE##_MIN; \
7698 break; \
7699 \
7700 case FP_NORMAL: \
7701 if (F >= FTYPE##_##ITYPE##_MAX) \
7702 { \
7703 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
7704 VALUE = ITYPE##_MAX; \
7705 } \
7706 else if (F <= FTYPE##_##ITYPE##_MIN) \
7707 { \
7708 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
7709 VALUE = ITYPE##_MIN; \
7710 } \
7711 break; \
7712 \
7713 case FP_SUBNORMAL: \
7714 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
7715 VALUE = 0; \
7716 break; \
7717 \
7718 default: \
7719 case FP_ZERO: \
7720 VALUE = 0; \
7721 break; \
7722 } \
7723 } \
7724 while (0)
7725
7726 /* 32 bit convert float to signed int truncate towards zero. */
7727 static void
7728 fcvtszs32 (sim_cpu *cpu)
7729 {
7730 unsigned sn = INSTR (9, 5);
7731 unsigned rd = INSTR (4, 0);
7732 /* TODO : check that this rounds toward zero. */
7733 float f = aarch64_get_FP_float (cpu, sn);
7734 int32_t value = (int32_t) f;
7735
7736 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
7737
7738 /* Avoid sign extension to 64 bit. */
7739 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
7740 }
7741
7742 /* 64 bit convert float to signed int truncate towards zero. */
7743 static void
7744 fcvtszs (sim_cpu *cpu)
7745 {
7746 unsigned sn = INSTR (9, 5);
7747 unsigned rd = INSTR (4, 0);
7748 float f = aarch64_get_FP_float (cpu, sn);
7749 int64_t value = (int64_t) f;
7750
7751 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
7752
7753 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
7754 }
7755
7756 /* 32 bit convert double to signed int truncate towards zero. */
7757 static void
7758 fcvtszd32 (sim_cpu *cpu)
7759 {
7760 unsigned sn = INSTR (9, 5);
7761 unsigned rd = INSTR (4, 0);
7762 /* TODO : check that this rounds toward zero. */
7763 double d = aarch64_get_FP_double (cpu, sn);
7764 int32_t value = (int32_t) d;
7765
7766 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
7767
7768 /* Avoid sign extension to 64 bit. */
7769 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
7770 }
7771
7772 /* 64 bit convert double to signed int truncate towards zero. */
7773 static void
7774 fcvtszd (sim_cpu *cpu)
7775 {
7776 unsigned sn = INSTR (9, 5);
7777 unsigned rd = INSTR (4, 0);
7778 /* TODO : check that this rounds toward zero. */
7779 double d = aarch64_get_FP_double (cpu, sn);
7780 int64_t value;
7781
7782 value = (int64_t) d;
7783
7784 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
7785
7786 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
7787 }
7788
7789 static void
7790 do_fcvtzu (sim_cpu *cpu)
7791 {
7792 /* instr[31] = size: 32-bit (0), 64-bit (1)
7793 instr[30,23] = 00111100
7794 instr[22] = type: single (0)/ double (1)
7795 instr[21] = enable (0)/disable(1) precision
7796 instr[20,16] = 11001
7797 instr[15,10] = precision
7798 instr[9,5] = Rs
7799 instr[4,0] = Rd. */
7800
7801 unsigned rs = INSTR (9, 5);
7802 unsigned rd = INSTR (4, 0);
7803
7804 NYI_assert (30, 23, 0x3C);
7805 NYI_assert (20, 16, 0x19);
7806
7807 if (INSTR (21, 21) != 1)
7808 /* Convert to fixed point. */
7809 HALT_NYI;
7810
7811 if (INSTR (31, 31))
7812 {
7813 /* Convert to unsigned 64-bit integer. */
7814 if (INSTR (22, 22))
7815 {
7816 double d = aarch64_get_FP_double (cpu, rs);
7817 uint64_t value = (uint64_t) d;
7818
7819 /* Do not raise an exception if we have reached ULONG_MAX. */
7820 if (value != (1UL << 63))
7821 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
7822
7823 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
7824 }
7825 else
7826 {
7827 float f = aarch64_get_FP_float (cpu, rs);
7828 uint64_t value = (uint64_t) f;
7829
7830 /* Do not raise an exception if we have reached ULONG_MAX. */
7831 if (value != (1UL << 63))
7832 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
7833
7834 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
7835 }
7836 }
7837 else
7838 {
7839 uint32_t value;
7840
7841 /* Convert to unsigned 32-bit integer. */
7842 if (INSTR (22, 22))
7843 {
7844 double d = aarch64_get_FP_double (cpu, rs);
7845
7846 value = (uint32_t) d;
7847 /* Do not raise an exception if we have reached UINT_MAX. */
7848 if (value != (1UL << 31))
7849 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
7850 }
7851 else
7852 {
7853 float f = aarch64_get_FP_float (cpu, rs);
7854
7855 value = (uint32_t) f;
7856 /* Do not raise an exception if we have reached UINT_MAX. */
7857 if (value != (1UL << 31))
7858 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
7859 }
7860
7861 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
7862 }
7863 }
7864
7865 static void
7866 do_UCVTF (sim_cpu *cpu)
7867 {
7868 /* instr[31] = size: 32-bit (0), 64-bit (1)
7869 instr[30,23] = 001 1110 0
7870 instr[22] = type: single (0)/ double (1)
7871 instr[21] = enable (0)/disable(1) precision
7872 instr[20,16] = 0 0011
7873 instr[15,10] = precision
7874 instr[9,5] = Rs
7875 instr[4,0] = Rd. */
7876
7877 unsigned rs = INSTR (9, 5);
7878 unsigned rd = INSTR (4, 0);
7879
7880 NYI_assert (30, 23, 0x3C);
7881 NYI_assert (20, 16, 0x03);
7882
7883 if (INSTR (21, 21) != 1)
7884 HALT_NYI;
7885
7886 /* FIXME: Add exception raising. */
7887 if (INSTR (31, 31))
7888 {
7889 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
7890
7891 if (INSTR (22, 22))
7892 aarch64_set_FP_double (cpu, rd, (double) value);
7893 else
7894 aarch64_set_FP_float (cpu, rd, (float) value);
7895 }
7896 else
7897 {
7898 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
7899
7900 if (INSTR (22, 22))
7901 aarch64_set_FP_double (cpu, rd, (double) value);
7902 else
7903 aarch64_set_FP_float (cpu, rd, (float) value);
7904 }
7905 }
7906
7907 static void
7908 float_vector_move (sim_cpu *cpu)
7909 {
7910 /* instr[31,17] == 100 1111 0101 0111
7911 instr[16] ==> direction 0=> to GR, 1=> from GR
7912 instr[15,10] => ???
7913 instr[9,5] ==> source
7914 instr[4,0] ==> dest. */
7915
7916 unsigned rn = INSTR (9, 5);
7917 unsigned rd = INSTR (4, 0);
7918
7919 NYI_assert (31, 17, 0x4F57);
7920
7921 if (INSTR (15, 10) != 0)
7922 HALT_UNALLOC;
7923
7924 if (INSTR (16, 16))
7925 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7926 else
7927 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
7928 }
7929
7930 static void
7931 dexSimpleFPIntegerConvert (sim_cpu *cpu)
7932 {
7933 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
7934 instr[30 = 0
7935 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
7936 instr[28,25] = 1111
7937 instr[24] = 0
7938 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
7939 instr[21] = 1
7940 instr[20,19] = rmode
7941 instr[18,16] = opcode
7942 instr[15,10] = 10 0000 */
7943
7944 uint32_t rmode_opcode;
7945 uint32_t size_type;
7946 uint32_t type;
7947 uint32_t size;
7948 uint32_t S;
7949
7950 if (INSTR (31, 17) == 0x4F57)
7951 {
7952 float_vector_move (cpu);
7953 return;
7954 }
7955
7956 size = INSTR (31, 31);
7957 S = INSTR (29, 29);
7958 if (S != 0)
7959 HALT_UNALLOC;
7960
7961 type = INSTR (23, 22);
7962 if (type > 1)
7963 HALT_UNALLOC;
7964
7965 rmode_opcode = INSTR (20, 16);
7966 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
7967
7968 switch (rmode_opcode)
7969 {
7970 case 2: /* SCVTF. */
7971 switch (size_type)
7972 {
7973 case 0: scvtf32 (cpu); return;
7974 case 1: scvtd32 (cpu); return;
7975 case 2: scvtf (cpu); return;
7976 case 3: scvtd (cpu); return;
7977 }
7978
7979 case 6: /* FMOV GR, Vec. */
7980 switch (size_type)
7981 {
7982 case 0: gfmovs (cpu); return;
7983 case 3: gfmovd (cpu); return;
7984 default: HALT_UNALLOC;
7985 }
7986
7987 case 7: /* FMOV vec, GR. */
7988 switch (size_type)
7989 {
7990 case 0: fgmovs (cpu); return;
7991 case 3: fgmovd (cpu); return;
7992 default: HALT_UNALLOC;
7993 }
7994
7995 case 24: /* FCVTZS. */
7996 switch (size_type)
7997 {
7998 case 0: fcvtszs32 (cpu); return;
7999 case 1: fcvtszd32 (cpu); return;
8000 case 2: fcvtszs (cpu); return;
8001 case 3: fcvtszd (cpu); return;
8002 }
8003
8004 case 25: do_fcvtzu (cpu); return;
8005 case 3: do_UCVTF (cpu); return;
8006
8007 case 0: /* FCVTNS. */
8008 case 1: /* FCVTNU. */
8009 case 4: /* FCVTAS. */
8010 case 5: /* FCVTAU. */
8011 case 8: /* FCVPTS. */
8012 case 9: /* FCVTPU. */
8013 case 16: /* FCVTMS. */
8014 case 17: /* FCVTMU. */
8015 default:
8016 HALT_NYI;
8017 }
8018 }
8019
8020 static void
8021 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8022 {
8023 uint32_t flags;
8024
8025 if (isnan (fvalue1) || isnan (fvalue2))
8026 flags = C|V;
8027 else
8028 {
8029 float result = fvalue1 - fvalue2;
8030
8031 if (result == 0.0)
8032 flags = Z|C;
8033 else if (result < 0)
8034 flags = N;
8035 else /* (result > 0). */
8036 flags = C;
8037 }
8038
8039 aarch64_set_CPSR (cpu, flags);
8040 }
8041
8042 static void
8043 fcmps (sim_cpu *cpu)
8044 {
8045 unsigned sm = INSTR (20, 16);
8046 unsigned sn = INSTR ( 9, 5);
8047
8048 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8049 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8050
8051 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8052 }
8053
8054 /* Float compare to zero -- Invalid Operation exception
8055 only on signaling NaNs. */
8056 static void
8057 fcmpzs (sim_cpu *cpu)
8058 {
8059 unsigned sn = INSTR ( 9, 5);
8060 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8061
8062 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8063 }
8064
8065 /* Float compare -- Invalid Operation exception on all NaNs. */
8066 static void
8067 fcmpes (sim_cpu *cpu)
8068 {
8069 unsigned sm = INSTR (20, 16);
8070 unsigned sn = INSTR ( 9, 5);
8071
8072 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8073 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8074
8075 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8076 }
8077
8078 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8079 static void
8080 fcmpzes (sim_cpu *cpu)
8081 {
8082 unsigned sn = INSTR ( 9, 5);
8083 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8084
8085 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8086 }
8087
8088 static void
8089 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8090 {
8091 uint32_t flags;
8092
8093 if (isnan (dval1) || isnan (dval2))
8094 flags = C|V;
8095 else
8096 {
8097 double result = dval1 - dval2;
8098
8099 if (result == 0.0)
8100 flags = Z|C;
8101 else if (result < 0)
8102 flags = N;
8103 else /* (result > 0). */
8104 flags = C;
8105 }
8106
8107 aarch64_set_CPSR (cpu, flags);
8108 }
8109
8110 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8111 static void
8112 fcmpd (sim_cpu *cpu)
8113 {
8114 unsigned sm = INSTR (20, 16);
8115 unsigned sn = INSTR ( 9, 5);
8116
8117 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8118 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8119
8120 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8121 }
8122
8123 /* Double compare to zero -- Invalid Operation exception
8124 only on signaling NaNs. */
8125 static void
8126 fcmpzd (sim_cpu *cpu)
8127 {
8128 unsigned sn = INSTR ( 9, 5);
8129 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8130
8131 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8132 }
8133
8134 /* Double compare -- Invalid Operation exception on all NaNs. */
8135 static void
8136 fcmped (sim_cpu *cpu)
8137 {
8138 unsigned sm = INSTR (20, 16);
8139 unsigned sn = INSTR ( 9, 5);
8140
8141 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8142 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8143
8144 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8145 }
8146
8147 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8148 static void
8149 fcmpzed (sim_cpu *cpu)
8150 {
8151 unsigned sn = INSTR ( 9, 5);
8152 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8153
8154 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8155 }
8156
8157 static void
8158 dexSimpleFPCompare (sim_cpu *cpu)
8159 {
8160 /* assert instr[28,25] == 1111
8161 instr[30:24:21:13,10] = 0011000
8162 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8163 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8164 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8165 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8166 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8167 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8168 ow ==> UNALLOC */
8169 uint32_t dispatch;
8170 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8171 uint32_t type = INSTR (23, 22);
8172 uint32_t op = INSTR (15, 14);
8173 uint32_t op2_2_0 = INSTR (2, 0);
8174
8175 if (op2_2_0 != 0)
8176 HALT_UNALLOC;
8177
8178 if (M_S != 0)
8179 HALT_UNALLOC;
8180
8181 if (type > 1)
8182 HALT_UNALLOC;
8183
8184 if (op != 0)
8185 HALT_UNALLOC;
8186
8187 /* dispatch on type and top 2 bits of opcode. */
8188 dispatch = (type << 2) | INSTR (4, 3);
8189
8190 switch (dispatch)
8191 {
8192 case 0: fcmps (cpu); return;
8193 case 1: fcmpzs (cpu); return;
8194 case 2: fcmpes (cpu); return;
8195 case 3: fcmpzes (cpu); return;
8196 case 4: fcmpd (cpu); return;
8197 case 5: fcmpzd (cpu); return;
8198 case 6: fcmped (cpu); return;
8199 case 7: fcmpzed (cpu); return;
8200 }
8201 }
8202
8203 static void
8204 do_scalar_FADDP (sim_cpu *cpu)
8205 {
8206 /* instr [31,23] = 0111 1110 0
8207 instr [22] = single(0)/double(1)
8208 instr [21,10] = 11 0000 1101 10
8209 instr [9,5] = Fn
8210 instr [4,0] = Fd. */
8211
8212 unsigned Fn = INSTR (9, 5);
8213 unsigned Fd = INSTR (4, 0);
8214
8215 NYI_assert (31, 23, 0x0FC);
8216 NYI_assert (21, 10, 0xC36);
8217
8218 if (INSTR (22, 22))
8219 {
8220 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8221 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8222
8223 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8224 }
8225 else
8226 {
8227 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8228 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8229
8230 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8231 }
8232 }
8233
8234 /* Floating point absolute difference. */
8235
8236 static void
8237 do_scalar_FABD (sim_cpu *cpu)
8238 {
8239 /* instr [31,23] = 0111 1110 1
8240 instr [22] = float(0)/double(1)
8241 instr [21] = 1
8242 instr [20,16] = Rm
8243 instr [15,10] = 1101 01
8244 instr [9, 5] = Rn
8245 instr [4, 0] = Rd. */
8246
8247 unsigned rm = INSTR (20, 16);
8248 unsigned rn = INSTR (9, 5);
8249 unsigned rd = INSTR (4, 0);
8250
8251 NYI_assert (31, 23, 0x0FD);
8252 NYI_assert (21, 21, 1);
8253 NYI_assert (15, 10, 0x35);
8254
8255 if (INSTR (22, 22))
8256 aarch64_set_FP_double (cpu, rd,
8257 fabs (aarch64_get_FP_double (cpu, rn)
8258 - aarch64_get_FP_double (cpu, rm)));
8259 else
8260 aarch64_set_FP_float (cpu, rd,
8261 fabsf (aarch64_get_FP_float (cpu, rn)
8262 - aarch64_get_FP_float (cpu, rm)));
8263 }
8264
8265 static void
8266 do_scalar_CMGT (sim_cpu *cpu)
8267 {
8268 /* instr [31,21] = 0101 1110 111
8269 instr [20,16] = Rm
8270 instr [15,10] = 00 1101
8271 instr [9, 5] = Rn
8272 instr [4, 0] = Rd. */
8273
8274 unsigned rm = INSTR (20, 16);
8275 unsigned rn = INSTR (9, 5);
8276 unsigned rd = INSTR (4, 0);
8277
8278 NYI_assert (31, 21, 0x2F7);
8279 NYI_assert (15, 10, 0x0D);
8280
8281 aarch64_set_vec_u64 (cpu, rd, 0,
8282 aarch64_get_vec_u64 (cpu, rn, 0) >
8283 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8284 }
8285
8286 static void
8287 do_scalar_USHR (sim_cpu *cpu)
8288 {
8289 /* instr [31,23] = 0111 1111 0
8290 instr [22,16] = shift amount
8291 instr [15,10] = 0000 01
8292 instr [9, 5] = Rn
8293 instr [4, 0] = Rd. */
8294
8295 unsigned amount = 128 - INSTR (22, 16);
8296 unsigned rn = INSTR (9, 5);
8297 unsigned rd = INSTR (4, 0);
8298
8299 NYI_assert (31, 23, 0x0FE);
8300 NYI_assert (15, 10, 0x01);
8301
8302 aarch64_set_vec_u64 (cpu, rd, 0,
8303 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8304 }
8305
8306 static void
8307 do_scalar_SSHL (sim_cpu *cpu)
8308 {
8309 /* instr [31,21] = 0101 1110 111
8310 instr [20,16] = Rm
8311 instr [15,10] = 0100 01
8312 instr [9, 5] = Rn
8313 instr [4, 0] = Rd. */
8314
8315 unsigned rm = INSTR (20, 16);
8316 unsigned rn = INSTR (9, 5);
8317 unsigned rd = INSTR (4, 0);
8318 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8319
8320 NYI_assert (31, 21, 0x2F7);
8321 NYI_assert (15, 10, 0x11);
8322
8323 if (shift >= 0)
8324 aarch64_set_vec_s64 (cpu, rd, 0,
8325 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8326 else
8327 aarch64_set_vec_s64 (cpu, rd, 0,
8328 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8329 }
8330
8331 static void
8332 do_scalar_shift (sim_cpu *cpu)
8333 {
8334 /* instr [31,23] = 0101 1111 0
8335 instr [22,16] = shift amount
8336 instr [15,10] = 0101 01 [SHL]
8337 instr [15,10] = 0000 01 [SSHR]
8338 instr [9, 5] = Rn
8339 instr [4, 0] = Rd. */
8340
8341 unsigned rn = INSTR (9, 5);
8342 unsigned rd = INSTR (4, 0);
8343 unsigned amount;
8344
8345 NYI_assert (31, 23, 0x0BE);
8346
8347 if (INSTR (22, 22) == 0)
8348 HALT_UNALLOC;
8349
8350 switch (INSTR (15, 10))
8351 {
8352 case 0x01: /* SSHR */
8353 amount = 128 - INSTR (22, 16);
8354 aarch64_set_vec_s64 (cpu, rd, 0,
8355 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8356 return;
8357 case 0x15: /* SHL */
8358 amount = INSTR (22, 16) - 64;
8359 aarch64_set_vec_u64 (cpu, rd, 0,
8360 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8361 return;
8362 default:
8363 HALT_NYI;
8364 }
8365 }
8366
8367 /* FCMEQ FCMGT FCMGE. */
8368 static void
8369 do_scalar_FCM (sim_cpu *cpu)
8370 {
8371 /* instr [31,30] = 01
8372 instr [29] = U
8373 instr [28,24] = 1 1110
8374 instr [23] = E
8375 instr [22] = size
8376 instr [21] = 1
8377 instr [20,16] = Rm
8378 instr [15,12] = 1110
8379 instr [11] = AC
8380 instr [10] = 1
8381 instr [9, 5] = Rn
8382 instr [4, 0] = Rd. */
8383
8384 unsigned rm = INSTR (20, 16);
8385 unsigned rn = INSTR (9, 5);
8386 unsigned rd = INSTR (4, 0);
8387 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
8388 unsigned result;
8389 float val1;
8390 float val2;
8391
8392 NYI_assert (31, 30, 1);
8393 NYI_assert (28, 24, 0x1E);
8394 NYI_assert (21, 21, 1);
8395 NYI_assert (15, 12, 0xE);
8396 NYI_assert (10, 10, 1);
8397
8398 if (INSTR (22, 22))
8399 {
8400 double val1 = aarch64_get_FP_double (cpu, rn);
8401 double val2 = aarch64_get_FP_double (cpu, rm);
8402
8403 switch (EUac)
8404 {
8405 case 0: /* 000 */
8406 result = val1 == val2;
8407 break;
8408
8409 case 3: /* 011 */
8410 val1 = fabs (val1);
8411 val2 = fabs (val2);
8412 /* Fall through. */
8413 case 2: /* 010 */
8414 result = val1 >= val2;
8415 break;
8416
8417 case 7: /* 111 */
8418 val1 = fabs (val1);
8419 val2 = fabs (val2);
8420 /* Fall through. */
8421 case 6: /* 110 */
8422 result = val1 > val2;
8423 break;
8424
8425 default:
8426 HALT_UNALLOC;
8427 }
8428
8429 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8430 return;
8431 }
8432
8433 val1 = aarch64_get_FP_float (cpu, rn);
8434 val2 = aarch64_get_FP_float (cpu, rm);
8435
8436 switch (EUac)
8437 {
8438 case 0: /* 000 */
8439 result = val1 == val2;
8440 break;
8441
8442 case 3: /* 011 */
8443 val1 = fabsf (val1);
8444 val2 = fabsf (val2);
8445 /* Fall through. */
8446 case 2: /* 010 */
8447 result = val1 >= val2;
8448 break;
8449
8450 case 7: /* 111 */
8451 val1 = fabsf (val1);
8452 val2 = fabsf (val2);
8453 /* Fall through. */
8454 case 6: /* 110 */
8455 result = val1 > val2;
8456 break;
8457
8458 default:
8459 HALT_UNALLOC;
8460 }
8461
8462 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8463 }
8464
8465 /* An alias of DUP. */
8466 static void
8467 do_scalar_MOV (sim_cpu *cpu)
8468 {
8469 /* instr [31,21] = 0101 1110 000
8470 instr [20,16] = imm5
8471 instr [15,10] = 0000 01
8472 instr [9, 5] = Rn
8473 instr [4, 0] = Rd. */
8474
8475 unsigned rn = INSTR (9, 5);
8476 unsigned rd = INSTR (4, 0);
8477 unsigned index;
8478
8479 NYI_assert (31, 21, 0x2F0);
8480 NYI_assert (15, 10, 0x01);
8481
8482 if (INSTR (16, 16))
8483 {
8484 /* 8-bit. */
8485 index = INSTR (20, 17);
8486 aarch64_set_vec_u8
8487 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
8488 }
8489 else if (INSTR (17, 17))
8490 {
8491 /* 16-bit. */
8492 index = INSTR (20, 18);
8493 aarch64_set_vec_u16
8494 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
8495 }
8496 else if (INSTR (18, 18))
8497 {
8498 /* 32-bit. */
8499 index = INSTR (20, 19);
8500 aarch64_set_vec_u32
8501 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
8502 }
8503 else if (INSTR (19, 19))
8504 {
8505 /* 64-bit. */
8506 index = INSTR (20, 20);
8507 aarch64_set_vec_u64
8508 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
8509 }
8510 else
8511 HALT_UNALLOC;
8512 }
8513
8514 static void
8515 do_scalar_NEG (sim_cpu *cpu)
8516 {
8517 /* instr [31,10] = 0111 1110 1110 0000 1011 10
8518 instr [9, 5] = Rn
8519 instr [4, 0] = Rd. */
8520
8521 unsigned rn = INSTR (9, 5);
8522 unsigned rd = INSTR (4, 0);
8523
8524 NYI_assert (31, 10, 0x1FB82E);
8525
8526 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
8527 }
8528
8529 static void
8530 do_scalar_USHL (sim_cpu *cpu)
8531 {
8532 /* instr [31,21] = 0111 1110 111
8533 instr [20,16] = Rm
8534 instr [15,10] = 0100 01
8535 instr [9, 5] = Rn
8536 instr [4, 0] = Rd. */
8537
8538 unsigned rm = INSTR (20, 16);
8539 unsigned rn = INSTR (9, 5);
8540 unsigned rd = INSTR (4, 0);
8541 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8542
8543 NYI_assert (31, 21, 0x3F7);
8544 NYI_assert (15, 10, 0x11);
8545
8546 if (shift >= 0)
8547 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
8548 else
8549 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
8550 }
8551
8552 static void
8553 do_double_add (sim_cpu *cpu)
8554 {
8555 /* instr [31,21] = 0101 1110 111
8556 instr [20,16] = Fn
8557 instr [15,10] = 1000 01
8558 instr [9,5] = Fm
8559 instr [4,0] = Fd. */
8560 unsigned Fd;
8561 unsigned Fm;
8562 unsigned Fn;
8563 double val1;
8564 double val2;
8565
8566 NYI_assert (31, 21, 0x2F7);
8567 NYI_assert (15, 10, 0x21);
8568
8569 Fd = INSTR (4, 0);
8570 Fm = INSTR (9, 5);
8571 Fn = INSTR (20, 16);
8572
8573 val1 = aarch64_get_FP_double (cpu, Fm);
8574 val2 = aarch64_get_FP_double (cpu, Fn);
8575
8576 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8577 }
8578
8579 static void
8580 do_scalar_UCVTF (sim_cpu *cpu)
8581 {
8582 /* instr [31,23] = 0111 1110 0
8583 instr [22] = single(0)/double(1)
8584 instr [21,10] = 10 0001 1101 10
8585 instr [9,5] = rn
8586 instr [4,0] = rd. */
8587
8588 unsigned rn = INSTR (9, 5);
8589 unsigned rd = INSTR (4, 0);
8590
8591 NYI_assert (31, 23, 0x0FC);
8592 NYI_assert (21, 10, 0x876);
8593
8594 if (INSTR (22, 22))
8595 {
8596 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
8597
8598 aarch64_set_vec_double (cpu, rd, 0, (double) val);
8599 }
8600 else
8601 {
8602 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
8603
8604 aarch64_set_vec_float (cpu, rd, 0, (float) val);
8605 }
8606 }
8607
8608 static void
8609 do_scalar_vec (sim_cpu *cpu)
8610 {
8611 /* instr [30] = 1. */
8612 /* instr [28,25] = 1111. */
8613 switch (INSTR (31, 23))
8614 {
8615 case 0xBC:
8616 switch (INSTR (15, 10))
8617 {
8618 case 0x01: do_scalar_MOV (cpu); return;
8619 case 0x39: do_scalar_FCM (cpu); return;
8620 case 0x3B: do_scalar_FCM (cpu); return;
8621 }
8622 break;
8623
8624 case 0xBE: do_scalar_shift (cpu); return;
8625
8626 case 0xFC:
8627 switch (INSTR (15, 10))
8628 {
8629 case 0x36:
8630 switch (INSTR (21, 16))
8631 {
8632 case 0x30: do_scalar_FADDP (cpu); return;
8633 case 0x21: do_scalar_UCVTF (cpu); return;
8634 }
8635 HALT_NYI;
8636 case 0x39: do_scalar_FCM (cpu); return;
8637 case 0x3B: do_scalar_FCM (cpu); return;
8638 }
8639 break;
8640
8641 case 0xFD:
8642 switch (INSTR (15, 10))
8643 {
8644 case 0x0D: do_scalar_CMGT (cpu); return;
8645 case 0x11: do_scalar_USHL (cpu); return;
8646 case 0x2E: do_scalar_NEG (cpu); return;
8647 case 0x35: do_scalar_FABD (cpu); return;
8648 case 0x39: do_scalar_FCM (cpu); return;
8649 case 0x3B: do_scalar_FCM (cpu); return;
8650 default:
8651 HALT_NYI;
8652 }
8653
8654 case 0xFE: do_scalar_USHR (cpu); return;
8655
8656 case 0xBD:
8657 switch (INSTR (15, 10))
8658 {
8659 case 0x21: do_double_add (cpu); return;
8660 case 0x11: do_scalar_SSHL (cpu); return;
8661 default:
8662 HALT_NYI;
8663 }
8664
8665 default:
8666 HALT_NYI;
8667 }
8668 }
8669
8670 static void
8671 dexAdvSIMD1 (sim_cpu *cpu)
8672 {
8673 /* instr [28,25] = 1 111. */
8674
8675 /* We are currently only interested in the basic
8676 scalar fp routines which all have bit 30 = 0. */
8677 if (INSTR (30, 30))
8678 do_scalar_vec (cpu);
8679
8680 /* instr[24] is set for FP data processing 3-source and clear for
8681 all other basic scalar fp instruction groups. */
8682 else if (INSTR (24, 24))
8683 dexSimpleFPDataProc3Source (cpu);
8684
8685 /* instr[21] is clear for floating <-> fixed conversions and set for
8686 all other basic scalar fp instruction groups. */
8687 else if (!INSTR (21, 21))
8688 dexSimpleFPFixedConvert (cpu);
8689
8690 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
8691 11 ==> cond select, 00 ==> other. */
8692 else
8693 switch (INSTR (11, 10))
8694 {
8695 case 1: dexSimpleFPCondCompare (cpu); return;
8696 case 2: dexSimpleFPDataProc2Source (cpu); return;
8697 case 3: dexSimpleFPCondSelect (cpu); return;
8698
8699 default:
8700 /* Now an ordered cascade of tests.
8701 FP immediate has instr [12] == 1.
8702 FP compare has instr [13] == 1.
8703 FP Data Proc 1 Source has instr [14] == 1.
8704 FP floating <--> integer conversions has instr [15] == 0. */
8705 if (INSTR (12, 12))
8706 dexSimpleFPImmediate (cpu);
8707
8708 else if (INSTR (13, 13))
8709 dexSimpleFPCompare (cpu);
8710
8711 else if (INSTR (14, 14))
8712 dexSimpleFPDataProc1Source (cpu);
8713
8714 else if (!INSTR (15, 15))
8715 dexSimpleFPIntegerConvert (cpu);
8716
8717 else
8718 /* If we get here then instr[15] == 1 which means UNALLOC. */
8719 HALT_UNALLOC;
8720 }
8721 }
8722
8723 /* PC relative addressing. */
8724
8725 static void
8726 pcadr (sim_cpu *cpu)
8727 {
8728 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
8729 instr[30,29] = immlo
8730 instr[23,5] = immhi. */
8731 uint64_t address;
8732 unsigned rd = INSTR (4, 0);
8733 uint32_t isPage = INSTR (31, 31);
8734 union { int64_t u64; uint64_t s64; } imm;
8735 uint64_t offset;
8736
8737 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
8738 offset = imm.u64;
8739 offset = (offset << 2) | INSTR (30, 29);
8740
8741 address = aarch64_get_PC (cpu);
8742
8743 if (isPage)
8744 {
8745 offset <<= 12;
8746 address &= ~0xfff;
8747 }
8748
8749 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
8750 }
8751
8752 /* Specific decode and execute for group Data Processing Immediate. */
8753
8754 static void
8755 dexPCRelAddressing (sim_cpu *cpu)
8756 {
8757 /* assert instr[28,24] = 10000. */
8758 pcadr (cpu);
8759 }
8760
8761 /* Immediate logical.
8762 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
8763 16, 32 or 64 bit sequence pulled out at decode and possibly
8764 inverting it..
8765
8766 N.B. the output register (dest) can normally be Xn or SP
8767 the exception occurs for flag setting instructions which may
8768 only use Xn for the output (dest). The input register can
8769 never be SP. */
8770
8771 /* 32 bit and immediate. */
8772 static void
8773 and32 (sim_cpu *cpu, uint32_t bimm)
8774 {
8775 unsigned rn = INSTR (9, 5);
8776 unsigned rd = INSTR (4, 0);
8777
8778 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8779 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
8780 }
8781
8782 /* 64 bit and immediate. */
8783 static void
8784 and64 (sim_cpu *cpu, uint64_t bimm)
8785 {
8786 unsigned rn = INSTR (9, 5);
8787 unsigned rd = INSTR (4, 0);
8788
8789 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8790 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
8791 }
8792
8793 /* 32 bit and immediate set flags. */
8794 static void
8795 ands32 (sim_cpu *cpu, uint32_t bimm)
8796 {
8797 unsigned rn = INSTR (9, 5);
8798 unsigned rd = INSTR (4, 0);
8799
8800 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
8801 uint32_t value2 = bimm;
8802
8803 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8804 set_flags_for_binop32 (cpu, value1 & value2);
8805 }
8806
8807 /* 64 bit and immediate set flags. */
8808 static void
8809 ands64 (sim_cpu *cpu, uint64_t bimm)
8810 {
8811 unsigned rn = INSTR (9, 5);
8812 unsigned rd = INSTR (4, 0);
8813
8814 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
8815 uint64_t value2 = bimm;
8816
8817 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8818 set_flags_for_binop64 (cpu, value1 & value2);
8819 }
8820
8821 /* 32 bit exclusive or immediate. */
8822 static void
8823 eor32 (sim_cpu *cpu, uint32_t bimm)
8824 {
8825 unsigned rn = INSTR (9, 5);
8826 unsigned rd = INSTR (4, 0);
8827
8828 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8829 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
8830 }
8831
8832 /* 64 bit exclusive or immediate. */
8833 static void
8834 eor64 (sim_cpu *cpu, uint64_t bimm)
8835 {
8836 unsigned rn = INSTR (9, 5);
8837 unsigned rd = INSTR (4, 0);
8838
8839 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8840 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
8841 }
8842
8843 /* 32 bit or immediate. */
8844 static void
8845 orr32 (sim_cpu *cpu, uint32_t bimm)
8846 {
8847 unsigned rn = INSTR (9, 5);
8848 unsigned rd = INSTR (4, 0);
8849
8850 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8851 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
8852 }
8853
8854 /* 64 bit or immediate. */
8855 static void
8856 orr64 (sim_cpu *cpu, uint64_t bimm)
8857 {
8858 unsigned rn = INSTR (9, 5);
8859 unsigned rd = INSTR (4, 0);
8860
8861 aarch64_set_reg_u64 (cpu, rd, SP_OK,
8862 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
8863 }
8864
8865 /* Logical shifted register.
8866 These allow an optional LSL, ASR, LSR or ROR to the second source
8867 register with a count up to the register bit count.
8868 N.B register args may not be SP. */
8869
8870 /* 32 bit AND shifted register. */
8871 static void
8872 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8873 {
8874 unsigned rm = INSTR (20, 16);
8875 unsigned rn = INSTR (9, 5);
8876 unsigned rd = INSTR (4, 0);
8877
8878 aarch64_set_reg_u64
8879 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
8880 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
8881 }
8882
8883 /* 64 bit AND shifted register. */
8884 static void
8885 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8886 {
8887 unsigned rm = INSTR (20, 16);
8888 unsigned rn = INSTR (9, 5);
8889 unsigned rd = INSTR (4, 0);
8890
8891 aarch64_set_reg_u64
8892 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
8893 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
8894 }
8895
8896 /* 32 bit AND shifted register setting flags. */
8897 static void
8898 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8899 {
8900 unsigned rm = INSTR (20, 16);
8901 unsigned rn = INSTR (9, 5);
8902 unsigned rd = INSTR (4, 0);
8903
8904 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
8905 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
8906 shift, count);
8907
8908 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8909 set_flags_for_binop32 (cpu, value1 & value2);
8910 }
8911
8912 /* 64 bit AND shifted register setting flags. */
8913 static void
8914 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8915 {
8916 unsigned rm = INSTR (20, 16);
8917 unsigned rn = INSTR (9, 5);
8918 unsigned rd = INSTR (4, 0);
8919
8920 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
8921 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
8922 shift, count);
8923
8924 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8925 set_flags_for_binop64 (cpu, value1 & value2);
8926 }
8927
8928 /* 32 bit BIC shifted register. */
8929 static void
8930 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8931 {
8932 unsigned rm = INSTR (20, 16);
8933 unsigned rn = INSTR (9, 5);
8934 unsigned rd = INSTR (4, 0);
8935
8936 aarch64_set_reg_u64
8937 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
8938 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
8939 }
8940
8941 /* 64 bit BIC shifted register. */
8942 static void
8943 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8944 {
8945 unsigned rm = INSTR (20, 16);
8946 unsigned rn = INSTR (9, 5);
8947 unsigned rd = INSTR (4, 0);
8948
8949 aarch64_set_reg_u64
8950 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
8951 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
8952 }
8953
8954 /* 32 bit BIC shifted register setting flags. */
8955 static void
8956 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8957 {
8958 unsigned rm = INSTR (20, 16);
8959 unsigned rn = INSTR (9, 5);
8960 unsigned rd = INSTR (4, 0);
8961
8962 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
8963 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
8964 shift, count);
8965
8966 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8967 set_flags_for_binop32 (cpu, value1 & value2);
8968 }
8969
8970 /* 64 bit BIC shifted register setting flags. */
8971 static void
8972 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8973 {
8974 unsigned rm = INSTR (20, 16);
8975 unsigned rn = INSTR (9, 5);
8976 unsigned rd = INSTR (4, 0);
8977
8978 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
8979 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
8980 shift, count);
8981
8982 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
8983 set_flags_for_binop64 (cpu, value1 & value2);
8984 }
8985
8986 /* 32 bit EON shifted register. */
8987 static void
8988 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
8989 {
8990 unsigned rm = INSTR (20, 16);
8991 unsigned rn = INSTR (9, 5);
8992 unsigned rd = INSTR (4, 0);
8993
8994 aarch64_set_reg_u64
8995 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
8996 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
8997 }
8998
8999 /* 64 bit EON shifted register. */
9000 static void
9001 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9002 {
9003 unsigned rm = INSTR (20, 16);
9004 unsigned rn = INSTR (9, 5);
9005 unsigned rd = INSTR (4, 0);
9006
9007 aarch64_set_reg_u64
9008 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9009 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9010 }
9011
9012 /* 32 bit EOR shifted register. */
9013 static void
9014 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9015 {
9016 unsigned rm = INSTR (20, 16);
9017 unsigned rn = INSTR (9, 5);
9018 unsigned rd = INSTR (4, 0);
9019
9020 aarch64_set_reg_u64
9021 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9022 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9023 }
9024
9025 /* 64 bit EOR shifted register. */
9026 static void
9027 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9028 {
9029 unsigned rm = INSTR (20, 16);
9030 unsigned rn = INSTR (9, 5);
9031 unsigned rd = INSTR (4, 0);
9032
9033 aarch64_set_reg_u64
9034 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9035 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9036 }
9037
9038 /* 32 bit ORR shifted register. */
9039 static void
9040 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9041 {
9042 unsigned rm = INSTR (20, 16);
9043 unsigned rn = INSTR (9, 5);
9044 unsigned rd = INSTR (4, 0);
9045
9046 aarch64_set_reg_u64
9047 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9048 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9049 }
9050
9051 /* 64 bit ORR shifted register. */
9052 static void
9053 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9054 {
9055 unsigned rm = INSTR (20, 16);
9056 unsigned rn = INSTR (9, 5);
9057 unsigned rd = INSTR (4, 0);
9058
9059 aarch64_set_reg_u64
9060 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9061 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9062 }
9063
9064 /* 32 bit ORN shifted register. */
9065 static void
9066 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9067 {
9068 unsigned rm = INSTR (20, 16);
9069 unsigned rn = INSTR (9, 5);
9070 unsigned rd = INSTR (4, 0);
9071
9072 aarch64_set_reg_u64
9073 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9074 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9075 }
9076
9077 /* 64 bit ORN shifted register. */
9078 static void
9079 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9080 {
9081 unsigned rm = INSTR (20, 16);
9082 unsigned rn = INSTR (9, 5);
9083 unsigned rd = INSTR (4, 0);
9084
9085 aarch64_set_reg_u64
9086 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9087 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9088 }
9089
9090 static void
9091 dexLogicalImmediate (sim_cpu *cpu)
9092 {
9093 /* assert instr[28,23] = 1001000
9094 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9095 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9096 instr[22] = N : used to construct immediate mask
9097 instr[21,16] = immr
9098 instr[15,10] = imms
9099 instr[9,5] = Rn
9100 instr[4,0] = Rd */
9101
9102 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9103 uint32_t size = INSTR (31, 31);
9104 uint32_t N = INSTR (22, 22);
9105 /* uint32_t immr = INSTR (21, 16);. */
9106 /* uint32_t imms = INSTR (15, 10);. */
9107 uint32_t index = INSTR (22, 10);
9108 uint64_t bimm64 = LITable [index];
9109 uint32_t dispatch = INSTR (30, 29);
9110
9111 if (~size & N)
9112 HALT_UNALLOC;
9113
9114 if (!bimm64)
9115 HALT_UNALLOC;
9116
9117 if (size == 0)
9118 {
9119 uint32_t bimm = (uint32_t) bimm64;
9120
9121 switch (dispatch)
9122 {
9123 case 0: and32 (cpu, bimm); return;
9124 case 1: orr32 (cpu, bimm); return;
9125 case 2: eor32 (cpu, bimm); return;
9126 case 3: ands32 (cpu, bimm); return;
9127 }
9128 }
9129 else
9130 {
9131 switch (dispatch)
9132 {
9133 case 0: and64 (cpu, bimm64); return;
9134 case 1: orr64 (cpu, bimm64); return;
9135 case 2: eor64 (cpu, bimm64); return;
9136 case 3: ands64 (cpu, bimm64); return;
9137 }
9138 }
9139 HALT_UNALLOC;
9140 }
9141
9142 /* Immediate move.
9143 The uimm argument is a 16 bit value to be inserted into the
9144 target register the pos argument locates the 16 bit word in the
9145 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9146 3} for 64 bit.
9147 N.B register arg may not be SP so it should be.
9148 accessed using the setGZRegisterXXX accessors. */
9149
9150 /* 32 bit move 16 bit immediate zero remaining shorts. */
9151 static void
9152 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9153 {
9154 unsigned rd = INSTR (4, 0);
9155
9156 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9157 }
9158
9159 /* 64 bit move 16 bit immediate zero remaining shorts. */
9160 static void
9161 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9162 {
9163 unsigned rd = INSTR (4, 0);
9164
9165 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9166 }
9167
9168 /* 32 bit move 16 bit immediate negated. */
9169 static void
9170 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9171 {
9172 unsigned rd = INSTR (4, 0);
9173
9174 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9175 }
9176
9177 /* 64 bit move 16 bit immediate negated. */
9178 static void
9179 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9180 {
9181 unsigned rd = INSTR (4, 0);
9182
9183 aarch64_set_reg_u64
9184 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9185 ^ 0xffffffffffffffffULL));
9186 }
9187
9188 /* 32 bit move 16 bit immediate keep remaining shorts. */
9189 static void
9190 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9191 {
9192 unsigned rd = INSTR (4, 0);
9193 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9194 uint32_t value = val << (pos * 16);
9195 uint32_t mask = ~(0xffffU << (pos * 16));
9196
9197 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9198 }
9199
9200 /* 64 bit move 16 it immediate keep remaining shorts. */
9201 static void
9202 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9203 {
9204 unsigned rd = INSTR (4, 0);
9205 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9206 uint64_t value = (uint64_t) val << (pos * 16);
9207 uint64_t mask = ~(0xffffULL << (pos * 16));
9208
9209 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9210 }
9211
9212 static void
9213 dexMoveWideImmediate (sim_cpu *cpu)
9214 {
9215 /* assert instr[28:23] = 100101
9216 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9217 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9218 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9219 instr[20,5] = uimm16
9220 instr[4,0] = Rd */
9221
9222 /* N.B. the (multiple of 16) shift is applied by the called routine,
9223 we just pass the multiplier. */
9224
9225 uint32_t imm;
9226 uint32_t size = INSTR (31, 31);
9227 uint32_t op = INSTR (30, 29);
9228 uint32_t shift = INSTR (22, 21);
9229
9230 /* 32 bit can only shift 0 or 1 lot of 16.
9231 anything else is an unallocated instruction. */
9232 if (size == 0 && (shift > 1))
9233 HALT_UNALLOC;
9234
9235 if (op == 1)
9236 HALT_UNALLOC;
9237
9238 imm = INSTR (20, 5);
9239
9240 if (size == 0)
9241 {
9242 if (op == 0)
9243 movn32 (cpu, imm, shift);
9244 else if (op == 2)
9245 movz32 (cpu, imm, shift);
9246 else
9247 movk32 (cpu, imm, shift);
9248 }
9249 else
9250 {
9251 if (op == 0)
9252 movn64 (cpu, imm, shift);
9253 else if (op == 2)
9254 movz64 (cpu, imm, shift);
9255 else
9256 movk64 (cpu, imm, shift);
9257 }
9258 }
9259
9260 /* Bitfield operations.
9261 These take a pair of bit positions r and s which are in {0..31}
9262 or {0..63} depending on the instruction word size.
9263 N.B register args may not be SP. */
9264
9265 /* OK, we start with ubfm which just needs to pick
9266 some bits out of source zero the rest and write
9267 the result to dest. Just need two logical shifts. */
9268
9269 /* 32 bit bitfield move, left and right of affected zeroed
9270 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9271 static void
9272 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9273 {
9274 unsigned rd;
9275 unsigned rn = INSTR (9, 5);
9276 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9277
9278 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9279 if (r <= s)
9280 {
9281 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9282 We want only bits s:xxx:r at the bottom of the word
9283 so we LSL bit s up to bit 31 i.e. by 31 - s
9284 and then we LSR to bring bit 31 down to bit s - r
9285 i.e. by 31 + r - s. */
9286 value <<= 31 - s;
9287 value >>= 31 + r - s;
9288 }
9289 else
9290 {
9291 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9292 We want only bits s:xxx:0 starting at it 31-(r-1)
9293 so we LSL bit s up to bit 31 i.e. by 31 - s
9294 and then we LSL to bring bit 31 down to 31-(r-1)+s
9295 i.e. by r - (s + 1). */
9296 value <<= 31 - s;
9297 value >>= r - (s + 1);
9298 }
9299
9300 rd = INSTR (4, 0);
9301 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9302 }
9303
9304 /* 64 bit bitfield move, left and right of affected zeroed
9305 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9306 static void
9307 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9308 {
9309 unsigned rd;
9310 unsigned rn = INSTR (9, 5);
9311 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9312
9313 if (r <= s)
9314 {
9315 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9316 We want only bits s:xxx:r at the bottom of the word.
9317 So we LSL bit s up to bit 63 i.e. by 63 - s
9318 and then we LSR to bring bit 63 down to bit s - r
9319 i.e. by 63 + r - s. */
9320 value <<= 63 - s;
9321 value >>= 63 + r - s;
9322 }
9323 else
9324 {
9325 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9326 We want only bits s:xxx:0 starting at it 63-(r-1).
9327 So we LSL bit s up to bit 63 i.e. by 63 - s
9328 and then we LSL to bring bit 63 down to 63-(r-1)+s
9329 i.e. by r - (s + 1). */
9330 value <<= 63 - s;
9331 value >>= r - (s + 1);
9332 }
9333
9334 rd = INSTR (4, 0);
9335 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9336 }
9337
9338 /* The signed versions need to insert sign bits
9339 on the left of the inserted bit field. so we do
9340 much the same as the unsigned version except we
9341 use an arithmetic shift right -- this just means
9342 we need to operate on signed values. */
9343
9344 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
9345 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9346 static void
9347 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9348 {
9349 unsigned rd;
9350 unsigned rn = INSTR (9, 5);
9351 /* as per ubfm32 but use an ASR instead of an LSR. */
9352 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9353
9354 if (r <= s)
9355 {
9356 value <<= 31 - s;
9357 value >>= 31 + r - s;
9358 }
9359 else
9360 {
9361 value <<= 31 - s;
9362 value >>= r - (s + 1);
9363 }
9364
9365 rd = INSTR (4, 0);
9366 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
9367 }
9368
9369 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
9370 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9371 static void
9372 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9373 {
9374 unsigned rd;
9375 unsigned rn = INSTR (9, 5);
9376 /* acpu per ubfm but use an ASR instead of an LSR. */
9377 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
9378
9379 if (r <= s)
9380 {
9381 value <<= 63 - s;
9382 value >>= 63 + r - s;
9383 }
9384 else
9385 {
9386 value <<= 63 - s;
9387 value >>= r - (s + 1);
9388 }
9389
9390 rd = INSTR (4, 0);
9391 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
9392 }
9393
9394 /* Finally, these versions leave non-affected bits
9395 as is. so we need to generate the bits as per
9396 ubfm and also generate a mask to pick the
9397 bits from the original and computed values. */
9398
9399 /* 32 bit bitfield move, non-affected bits left as is.
9400 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9401 static void
9402 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9403 {
9404 unsigned rn = INSTR (9, 5);
9405 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9406 uint32_t mask = -1;
9407 unsigned rd;
9408 uint32_t value2;
9409
9410 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9411 if (r <= s)
9412 {
9413 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9414 We want only bits s:xxx:r at the bottom of the word
9415 so we LSL bit s up to bit 31 i.e. by 31 - s
9416 and then we LSR to bring bit 31 down to bit s - r
9417 i.e. by 31 + r - s. */
9418 value <<= 31 - s;
9419 value >>= 31 + r - s;
9420 /* the mask must include the same bits. */
9421 mask <<= 31 - s;
9422 mask >>= 31 + r - s;
9423 }
9424 else
9425 {
9426 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
9427 We want only bits s:xxx:0 starting at it 31-(r-1)
9428 so we LSL bit s up to bit 31 i.e. by 31 - s
9429 and then we LSL to bring bit 31 down to 31-(r-1)+s
9430 i.e. by r - (s + 1). */
9431 value <<= 31 - s;
9432 value >>= r - (s + 1);
9433 /* The mask must include the same bits. */
9434 mask <<= 31 - s;
9435 mask >>= r - (s + 1);
9436 }
9437
9438 rd = INSTR (4, 0);
9439 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9440
9441 value2 &= ~mask;
9442 value2 |= value;
9443
9444 aarch64_set_reg_u64
9445 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
9446 }
9447
9448 /* 64 bit bitfield move, non-affected bits left as is.
9449 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9450 static void
9451 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9452 {
9453 unsigned rd;
9454 unsigned rn = INSTR (9, 5);
9455 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9456 uint64_t mask = 0xffffffffffffffffULL;
9457
9458 if (r <= s)
9459 {
9460 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9461 We want only bits s:xxx:r at the bottom of the word
9462 so we LSL bit s up to bit 63 i.e. by 63 - s
9463 and then we LSR to bring bit 63 down to bit s - r
9464 i.e. by 63 + r - s. */
9465 value <<= 63 - s;
9466 value >>= 63 + r - s;
9467 /* The mask must include the same bits. */
9468 mask <<= 63 - s;
9469 mask >>= 63 + r - s;
9470 }
9471 else
9472 {
9473 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
9474 We want only bits s:xxx:0 starting at it 63-(r-1)
9475 so we LSL bit s up to bit 63 i.e. by 63 - s
9476 and then we LSL to bring bit 63 down to 63-(r-1)+s
9477 i.e. by r - (s + 1). */
9478 value <<= 63 - s;
9479 value >>= r - (s + 1);
9480 /* The mask must include the same bits. */
9481 mask <<= 63 - s;
9482 mask >>= r - (s + 1);
9483 }
9484
9485 rd = INSTR (4, 0);
9486 aarch64_set_reg_u64
9487 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
9488 }
9489
9490 static void
9491 dexBitfieldImmediate (sim_cpu *cpu)
9492 {
9493 /* assert instr[28:23] = 100110
9494 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9495 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
9496 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
9497 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
9498 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
9499 instr[9,5] = Rn
9500 instr[4,0] = Rd */
9501
9502 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9503 uint32_t dispatch;
9504 uint32_t imms;
9505 uint32_t size = INSTR (31, 31);
9506 uint32_t N = INSTR (22, 22);
9507 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
9508 /* or else we have an UNALLOC. */
9509 uint32_t immr = INSTR (21, 16);
9510
9511 if (~size & N)
9512 HALT_UNALLOC;
9513
9514 if (!size && uimm (immr, 5, 5))
9515 HALT_UNALLOC;
9516
9517 imms = INSTR (15, 10);
9518 if (!size && uimm (imms, 5, 5))
9519 HALT_UNALLOC;
9520
9521 /* Switch on combined size and op. */
9522 dispatch = INSTR (31, 29);
9523 switch (dispatch)
9524 {
9525 case 0: sbfm32 (cpu, immr, imms); return;
9526 case 1: bfm32 (cpu, immr, imms); return;
9527 case 2: ubfm32 (cpu, immr, imms); return;
9528 case 4: sbfm (cpu, immr, imms); return;
9529 case 5: bfm (cpu, immr, imms); return;
9530 case 6: ubfm (cpu, immr, imms); return;
9531 default: HALT_UNALLOC;
9532 }
9533 }
9534
9535 static void
9536 do_EXTR_32 (sim_cpu *cpu)
9537 {
9538 /* instr[31:21] = 00010011100
9539 instr[20,16] = Rm
9540 instr[15,10] = imms : 0xxxxx for 32 bit
9541 instr[9,5] = Rn
9542 instr[4,0] = Rd */
9543 unsigned rm = INSTR (20, 16);
9544 unsigned imms = INSTR (15, 10) & 31;
9545 unsigned rn = INSTR ( 9, 5);
9546 unsigned rd = INSTR ( 4, 0);
9547 uint64_t val1;
9548 uint64_t val2;
9549
9550 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
9551 val1 >>= imms;
9552 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9553 val2 <<= (32 - imms);
9554
9555 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
9556 }
9557
9558 static void
9559 do_EXTR_64 (sim_cpu *cpu)
9560 {
9561 /* instr[31:21] = 10010011100
9562 instr[20,16] = Rm
9563 instr[15,10] = imms
9564 instr[9,5] = Rn
9565 instr[4,0] = Rd */
9566 unsigned rm = INSTR (20, 16);
9567 unsigned imms = INSTR (15, 10) & 63;
9568 unsigned rn = INSTR ( 9, 5);
9569 unsigned rd = INSTR ( 4, 0);
9570 uint64_t val;
9571
9572 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
9573 val >>= imms;
9574 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
9575
9576 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
9577 }
9578
9579 static void
9580 dexExtractImmediate (sim_cpu *cpu)
9581 {
9582 /* assert instr[28:23] = 100111
9583 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9584 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
9585 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
9586 instr[21] = op0 : must be 0 or UNALLOC
9587 instr[20,16] = Rm
9588 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
9589 instr[9,5] = Rn
9590 instr[4,0] = Rd */
9591
9592 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9593 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
9594 uint32_t dispatch;
9595 uint32_t size = INSTR (31, 31);
9596 uint32_t N = INSTR (22, 22);
9597 /* 32 bit operations must have imms[5] = 0
9598 or else we have an UNALLOC. */
9599 uint32_t imms = INSTR (15, 10);
9600
9601 if (size ^ N)
9602 HALT_UNALLOC;
9603
9604 if (!size && uimm (imms, 5, 5))
9605 HALT_UNALLOC;
9606
9607 /* Switch on combined size and op. */
9608 dispatch = INSTR (31, 29);
9609
9610 if (dispatch == 0)
9611 do_EXTR_32 (cpu);
9612
9613 else if (dispatch == 4)
9614 do_EXTR_64 (cpu);
9615
9616 else if (dispatch == 1)
9617 HALT_NYI;
9618 else
9619 HALT_UNALLOC;
9620 }
9621
9622 static void
9623 dexDPImm (sim_cpu *cpu)
9624 {
9625 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
9626 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
9627 bits [25,23] of a DPImm are the secondary dispatch vector. */
9628 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
9629
9630 switch (group2)
9631 {
9632 case DPIMM_PCADR_000:
9633 case DPIMM_PCADR_001:
9634 dexPCRelAddressing (cpu);
9635 return;
9636
9637 case DPIMM_ADDSUB_010:
9638 case DPIMM_ADDSUB_011:
9639 dexAddSubtractImmediate (cpu);
9640 return;
9641
9642 case DPIMM_LOG_100:
9643 dexLogicalImmediate (cpu);
9644 return;
9645
9646 case DPIMM_MOV_101:
9647 dexMoveWideImmediate (cpu);
9648 return;
9649
9650 case DPIMM_BITF_110:
9651 dexBitfieldImmediate (cpu);
9652 return;
9653
9654 case DPIMM_EXTR_111:
9655 dexExtractImmediate (cpu);
9656 return;
9657
9658 default:
9659 /* Should never reach here. */
9660 HALT_NYI;
9661 }
9662 }
9663
9664 static void
9665 dexLoadUnscaledImmediate (sim_cpu *cpu)
9666 {
9667 /* instr[29,24] == 111_00
9668 instr[21] == 0
9669 instr[11,10] == 00
9670 instr[31,30] = size
9671 instr[26] = V
9672 instr[23,22] = opc
9673 instr[20,12] = simm9
9674 instr[9,5] = rn may be SP. */
9675 /* unsigned rt = INSTR (4, 0); */
9676 uint32_t V = INSTR (26, 26);
9677 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
9678 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
9679
9680 if (!V)
9681 {
9682 /* GReg operations. */
9683 switch (dispatch)
9684 {
9685 case 0: sturb (cpu, imm); return;
9686 case 1: ldurb32 (cpu, imm); return;
9687 case 2: ldursb64 (cpu, imm); return;
9688 case 3: ldursb32 (cpu, imm); return;
9689 case 4: sturh (cpu, imm); return;
9690 case 5: ldurh32 (cpu, imm); return;
9691 case 6: ldursh64 (cpu, imm); return;
9692 case 7: ldursh32 (cpu, imm); return;
9693 case 8: stur32 (cpu, imm); return;
9694 case 9: ldur32 (cpu, imm); return;
9695 case 10: ldursw (cpu, imm); return;
9696 case 12: stur64 (cpu, imm); return;
9697 case 13: ldur64 (cpu, imm); return;
9698
9699 case 14:
9700 /* PRFUM NYI. */
9701 HALT_NYI;
9702
9703 default:
9704 case 11:
9705 case 15:
9706 HALT_UNALLOC;
9707 }
9708 }
9709
9710 /* FReg operations. */
9711 switch (dispatch)
9712 {
9713 case 2: fsturq (cpu, imm); return;
9714 case 3: fldurq (cpu, imm); return;
9715 case 8: fsturs (cpu, imm); return;
9716 case 9: fldurs (cpu, imm); return;
9717 case 12: fsturd (cpu, imm); return;
9718 case 13: fldurd (cpu, imm); return;
9719
9720 case 0: /* STUR 8 bit FP. */
9721 case 1: /* LDUR 8 bit FP. */
9722 case 4: /* STUR 16 bit FP. */
9723 case 5: /* LDUR 8 bit FP. */
9724 HALT_NYI;
9725
9726 default:
9727 case 6:
9728 case 7:
9729 case 10:
9730 case 11:
9731 case 14:
9732 case 15:
9733 HALT_UNALLOC;
9734 }
9735 }
9736
9737 /* N.B. A preliminary note regarding all the ldrs<x>32
9738 instructions
9739
9740 The signed value loaded by these instructions is cast to unsigned
9741 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
9742 64 bit element of the GReg union. this performs a 32 bit sign extension
9743 (as required) but avoids 64 bit sign extension, thus ensuring that the
9744 top half of the register word is zero. this is what the spec demands
9745 when a 32 bit load occurs. */
9746
9747 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
9748 static void
9749 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
9750 {
9751 unsigned int rn = INSTR (9, 5);
9752 unsigned int rt = INSTR (4, 0);
9753
9754 /* The target register may not be SP but the source may be
9755 there is no scaling required for a byte load. */
9756 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
9757 aarch64_set_reg_u64 (cpu, rt, NO_SP,
9758 (int64_t) aarch64_get_mem_s8 (cpu, address));
9759 }
9760
9761 /* 32 bit load sign-extended byte scaled or unscaled zero-
9762 or sign-extended 32-bit register offset. */
9763 static void
9764 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9765 {
9766 unsigned int rm = INSTR (20, 16);
9767 unsigned int rn = INSTR (9, 5);
9768 unsigned int rt = INSTR (4, 0);
9769
9770 /* rn may reference SP, rm and rt must reference ZR. */
9771
9772 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9773 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9774 extension);
9775
9776 /* There is no scaling required for a byte load. */
9777 aarch64_set_reg_u64
9778 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
9779 + displacement));
9780 }
9781
9782 /* 32 bit load sign-extended byte unscaled signed 9 bit with
9783 pre- or post-writeback. */
9784 static void
9785 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
9786 {
9787 uint64_t address;
9788 unsigned int rn = INSTR (9, 5);
9789 unsigned int rt = INSTR (4, 0);
9790
9791 if (rn == rt && wb != NoWriteBack)
9792 HALT_UNALLOC;
9793
9794 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9795
9796 if (wb == Pre)
9797 address += offset;
9798
9799 aarch64_set_reg_u64 (cpu, rt, NO_SP,
9800 (int64_t) aarch64_get_mem_s8 (cpu, address));
9801
9802 if (wb == Post)
9803 address += offset;
9804
9805 if (wb != NoWriteBack)
9806 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
9807 }
9808
9809 /* 8 bit store scaled. */
9810 static void
9811 fstrb_abs (sim_cpu *cpu, uint32_t offset)
9812 {
9813 unsigned st = INSTR (4, 0);
9814 unsigned rn = INSTR (9, 5);
9815
9816 aarch64_set_mem_u8 (cpu,
9817 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
9818 aarch64_get_vec_u8 (cpu, st, 0));
9819 }
9820
9821 /* 8 bit store scaled or unscaled zero- or
9822 sign-extended 8-bit register offset. */
9823 static void
9824 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9825 {
9826 unsigned rm = INSTR (20, 16);
9827 unsigned rn = INSTR (9, 5);
9828 unsigned st = INSTR (4, 0);
9829
9830 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9831 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9832 extension);
9833 uint64_t displacement = scaling == Scaled ? extended : 0;
9834
9835 aarch64_set_mem_u8
9836 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
9837 }
9838
9839 /* 16 bit store scaled. */
9840 static void
9841 fstrh_abs (sim_cpu *cpu, uint32_t offset)
9842 {
9843 unsigned st = INSTR (4, 0);
9844 unsigned rn = INSTR (9, 5);
9845
9846 aarch64_set_mem_u16
9847 (cpu,
9848 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
9849 aarch64_get_vec_u16 (cpu, st, 0));
9850 }
9851
9852 /* 16 bit store scaled or unscaled zero-
9853 or sign-extended 16-bit register offset. */
9854 static void
9855 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9856 {
9857 unsigned rm = INSTR (20, 16);
9858 unsigned rn = INSTR (9, 5);
9859 unsigned st = INSTR (4, 0);
9860
9861 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9862 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9863 extension);
9864 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
9865
9866 aarch64_set_mem_u16
9867 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
9868 }
9869
9870 /* 32 bit store scaled unsigned 12 bit. */
9871 static void
9872 fstrs_abs (sim_cpu *cpu, uint32_t offset)
9873 {
9874 unsigned st = INSTR (4, 0);
9875 unsigned rn = INSTR (9, 5);
9876
9877 aarch64_set_mem_u32
9878 (cpu,
9879 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
9880 aarch64_get_vec_u32 (cpu, st, 0));
9881 }
9882
9883 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
9884 static void
9885 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
9886 {
9887 unsigned rn = INSTR (9, 5);
9888 unsigned st = INSTR (4, 0);
9889
9890 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9891
9892 if (wb != Post)
9893 address += offset;
9894
9895 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
9896
9897 if (wb == Post)
9898 address += offset;
9899
9900 if (wb != NoWriteBack)
9901 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
9902 }
9903
9904 /* 32 bit store scaled or unscaled zero-
9905 or sign-extended 32-bit register offset. */
9906 static void
9907 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9908 {
9909 unsigned rm = INSTR (20, 16);
9910 unsigned rn = INSTR (9, 5);
9911 unsigned st = INSTR (4, 0);
9912
9913 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9914 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9915 extension);
9916 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
9917
9918 aarch64_set_mem_u32
9919 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
9920 }
9921
9922 /* 64 bit store scaled unsigned 12 bit. */
9923 static void
9924 fstrd_abs (sim_cpu *cpu, uint32_t offset)
9925 {
9926 unsigned st = INSTR (4, 0);
9927 unsigned rn = INSTR (9, 5);
9928
9929 aarch64_set_mem_u64
9930 (cpu,
9931 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
9932 aarch64_get_vec_u64 (cpu, st, 0));
9933 }
9934
9935 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
9936 static void
9937 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
9938 {
9939 unsigned rn = INSTR (9, 5);
9940 unsigned st = INSTR (4, 0);
9941
9942 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9943
9944 if (wb != Post)
9945 address += offset;
9946
9947 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
9948
9949 if (wb == Post)
9950 address += offset;
9951
9952 if (wb != NoWriteBack)
9953 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
9954 }
9955
9956 /* 64 bit store scaled or unscaled zero-
9957 or sign-extended 32-bit register offset. */
9958 static void
9959 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
9960 {
9961 unsigned rm = INSTR (20, 16);
9962 unsigned rn = INSTR (9, 5);
9963 unsigned st = INSTR (4, 0);
9964
9965 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9966 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9967 extension);
9968 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
9969
9970 aarch64_set_mem_u64
9971 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
9972 }
9973
9974 /* 128 bit store scaled unsigned 12 bit. */
9975 static void
9976 fstrq_abs (sim_cpu *cpu, uint32_t offset)
9977 {
9978 FRegister a;
9979 unsigned st = INSTR (4, 0);
9980 unsigned rn = INSTR (9, 5);
9981 uint64_t addr;
9982
9983 aarch64_get_FP_long_double (cpu, st, & a);
9984
9985 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
9986 aarch64_set_mem_long_double (cpu, addr, a);
9987 }
9988
9989 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
9990 static void
9991 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
9992 {
9993 FRegister a;
9994 unsigned rn = INSTR (9, 5);
9995 unsigned st = INSTR (4, 0);
9996 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
9997
9998 if (wb != Post)
9999 address += offset;
10000
10001 aarch64_get_FP_long_double (cpu, st, & a);
10002 aarch64_set_mem_long_double (cpu, address, a);
10003
10004 if (wb == Post)
10005 address += offset;
10006
10007 if (wb != NoWriteBack)
10008 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10009 }
10010
10011 /* 128 bit store scaled or unscaled zero-
10012 or sign-extended 32-bit register offset. */
10013 static void
10014 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10015 {
10016 unsigned rm = INSTR (20, 16);
10017 unsigned rn = INSTR (9, 5);
10018 unsigned st = INSTR (4, 0);
10019
10020 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10021 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10022 extension);
10023 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10024
10025 FRegister a;
10026
10027 aarch64_get_FP_long_double (cpu, st, & a);
10028 aarch64_set_mem_long_double (cpu, address + displacement, a);
10029 }
10030
10031 static void
10032 dexLoadImmediatePrePost (sim_cpu *cpu)
10033 {
10034 /* instr[31,30] = size
10035 instr[29,27] = 111
10036 instr[26] = V
10037 instr[25,24] = 00
10038 instr[23,22] = opc
10039 instr[21] = 0
10040 instr[20,12] = simm9
10041 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10042 instr[10] = 0
10043 instr[9,5] = Rn may be SP.
10044 instr[4,0] = Rt */
10045
10046 uint32_t V = INSTR (26, 26);
10047 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10048 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10049 WriteBack wb = INSTR (11, 11);
10050
10051 if (!V)
10052 {
10053 /* GReg operations. */
10054 switch (dispatch)
10055 {
10056 case 0: strb_wb (cpu, imm, wb); return;
10057 case 1: ldrb32_wb (cpu, imm, wb); return;
10058 case 2: ldrsb_wb (cpu, imm, wb); return;
10059 case 3: ldrsb32_wb (cpu, imm, wb); return;
10060 case 4: strh_wb (cpu, imm, wb); return;
10061 case 5: ldrh32_wb (cpu, imm, wb); return;
10062 case 6: ldrsh64_wb (cpu, imm, wb); return;
10063 case 7: ldrsh32_wb (cpu, imm, wb); return;
10064 case 8: str32_wb (cpu, imm, wb); return;
10065 case 9: ldr32_wb (cpu, imm, wb); return;
10066 case 10: ldrsw_wb (cpu, imm, wb); return;
10067 case 12: str_wb (cpu, imm, wb); return;
10068 case 13: ldr_wb (cpu, imm, wb); return;
10069
10070 default:
10071 case 11:
10072 case 14:
10073 case 15:
10074 HALT_UNALLOC;
10075 }
10076 }
10077
10078 /* FReg operations. */
10079 switch (dispatch)
10080 {
10081 case 2: fstrq_wb (cpu, imm, wb); return;
10082 case 3: fldrq_wb (cpu, imm, wb); return;
10083 case 8: fstrs_wb (cpu, imm, wb); return;
10084 case 9: fldrs_wb (cpu, imm, wb); return;
10085 case 12: fstrd_wb (cpu, imm, wb); return;
10086 case 13: fldrd_wb (cpu, imm, wb); return;
10087
10088 case 0: /* STUR 8 bit FP. */
10089 case 1: /* LDUR 8 bit FP. */
10090 case 4: /* STUR 16 bit FP. */
10091 case 5: /* LDUR 8 bit FP. */
10092 HALT_NYI;
10093
10094 default:
10095 case 6:
10096 case 7:
10097 case 10:
10098 case 11:
10099 case 14:
10100 case 15:
10101 HALT_UNALLOC;
10102 }
10103 }
10104
10105 static void
10106 dexLoadRegisterOffset (sim_cpu *cpu)
10107 {
10108 /* instr[31,30] = size
10109 instr[29,27] = 111
10110 instr[26] = V
10111 instr[25,24] = 00
10112 instr[23,22] = opc
10113 instr[21] = 1
10114 instr[20,16] = rm
10115 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10116 110 ==> SXTW, 111 ==> SXTX,
10117 ow ==> RESERVED
10118 instr[12] = scaled
10119 instr[11,10] = 10
10120 instr[9,5] = rn
10121 instr[4,0] = rt. */
10122
10123 uint32_t V = INSTR (26, 26);
10124 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10125 Scaling scale = INSTR (12, 12);
10126 Extension extensionType = INSTR (15, 13);
10127
10128 /* Check for illegal extension types. */
10129 if (uimm (extensionType, 1, 1) == 0)
10130 HALT_UNALLOC;
10131
10132 if (extensionType == UXTX || extensionType == SXTX)
10133 extensionType = NoExtension;
10134
10135 if (!V)
10136 {
10137 /* GReg operations. */
10138 switch (dispatch)
10139 {
10140 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10141 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10142 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10143 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10144 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10145 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10146 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10147 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10148 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10149 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10150 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10151 case 12: str_scale_ext (cpu, scale, extensionType); return;
10152 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10153 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10154
10155 default:
10156 case 11:
10157 case 15:
10158 HALT_UNALLOC;
10159 }
10160 }
10161
10162 /* FReg operations. */
10163 switch (dispatch)
10164 {
10165 case 1: /* LDUR 8 bit FP. */
10166 HALT_NYI;
10167 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10168 case 5: /* LDUR 8 bit FP. */
10169 HALT_NYI;
10170 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10171 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10172
10173 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
10174 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
10175 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
10176 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
10177 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10178
10179 default:
10180 case 6:
10181 case 7:
10182 case 10:
10183 case 11:
10184 case 14:
10185 case 15:
10186 HALT_UNALLOC;
10187 }
10188 }
10189
10190 static void
10191 dexLoadUnsignedImmediate (sim_cpu *cpu)
10192 {
10193 /* instr[29,24] == 111_01
10194 instr[31,30] = size
10195 instr[26] = V
10196 instr[23,22] = opc
10197 instr[21,10] = uimm12 : unsigned immediate offset
10198 instr[9,5] = rn may be SP.
10199 instr[4,0] = rt. */
10200
10201 uint32_t V = INSTR (26,26);
10202 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10203 uint32_t imm = INSTR (21, 10);
10204
10205 if (!V)
10206 {
10207 /* GReg operations. */
10208 switch (dispatch)
10209 {
10210 case 0: strb_abs (cpu, imm); return;
10211 case 1: ldrb32_abs (cpu, imm); return;
10212 case 2: ldrsb_abs (cpu, imm); return;
10213 case 3: ldrsb32_abs (cpu, imm); return;
10214 case 4: strh_abs (cpu, imm); return;
10215 case 5: ldrh32_abs (cpu, imm); return;
10216 case 6: ldrsh_abs (cpu, imm); return;
10217 case 7: ldrsh32_abs (cpu, imm); return;
10218 case 8: str32_abs (cpu, imm); return;
10219 case 9: ldr32_abs (cpu, imm); return;
10220 case 10: ldrsw_abs (cpu, imm); return;
10221 case 12: str_abs (cpu, imm); return;
10222 case 13: ldr_abs (cpu, imm); return;
10223 case 14: prfm_abs (cpu, imm); return;
10224
10225 default:
10226 case 11:
10227 case 15:
10228 HALT_UNALLOC;
10229 }
10230 }
10231
10232 /* FReg operations. */
10233 switch (dispatch)
10234 {
10235 case 0: fstrb_abs (cpu, imm); return;
10236 case 4: fstrh_abs (cpu, imm); return;
10237 case 8: fstrs_abs (cpu, imm); return;
10238 case 12: fstrd_abs (cpu, imm); return;
10239 case 2: fstrq_abs (cpu, imm); return;
10240
10241 case 1: fldrb_abs (cpu, imm); return;
10242 case 5: fldrh_abs (cpu, imm); return;
10243 case 9: fldrs_abs (cpu, imm); return;
10244 case 13: fldrd_abs (cpu, imm); return;
10245 case 3: fldrq_abs (cpu, imm); return;
10246
10247 default:
10248 case 6:
10249 case 7:
10250 case 10:
10251 case 11:
10252 case 14:
10253 case 15:
10254 HALT_UNALLOC;
10255 }
10256 }
10257
10258 static void
10259 dexLoadExclusive (sim_cpu *cpu)
10260 {
10261 /* assert instr[29:24] = 001000;
10262 instr[31,30] = size
10263 instr[23] = 0 if exclusive
10264 instr[22] = L : 1 if load, 0 if store
10265 instr[21] = 1 if pair
10266 instr[20,16] = Rs
10267 instr[15] = o0 : 1 if ordered
10268 instr[14,10] = Rt2
10269 instr[9,5] = Rn
10270 instr[4.0] = Rt. */
10271
10272 switch (INSTR (22, 21))
10273 {
10274 case 2: ldxr (cpu); return;
10275 case 0: stxr (cpu); return;
10276 default: HALT_NYI;
10277 }
10278 }
10279
10280 static void
10281 dexLoadOther (sim_cpu *cpu)
10282 {
10283 uint32_t dispatch;
10284
10285 /* instr[29,25] = 111_0
10286 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10287 instr[21:11,10] is the secondary dispatch. */
10288 if (INSTR (24, 24))
10289 {
10290 dexLoadUnsignedImmediate (cpu);
10291 return;
10292 }
10293
10294 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
10295 switch (dispatch)
10296 {
10297 case 0: dexLoadUnscaledImmediate (cpu); return;
10298 case 1: dexLoadImmediatePrePost (cpu); return;
10299 case 3: dexLoadImmediatePrePost (cpu); return;
10300 case 6: dexLoadRegisterOffset (cpu); return;
10301
10302 default:
10303 case 2:
10304 case 4:
10305 case 5:
10306 case 7:
10307 HALT_NYI;
10308 }
10309 }
10310
10311 static void
10312 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10313 {
10314 unsigned rn = INSTR (14, 10);
10315 unsigned rd = INSTR (9, 5);
10316 unsigned rm = INSTR (4, 0);
10317 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10318
10319 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10320 HALT_UNALLOC; /* ??? */
10321
10322 offset <<= 2;
10323
10324 if (wb != Post)
10325 address += offset;
10326
10327 aarch64_set_mem_u32 (cpu, address,
10328 aarch64_get_reg_u32 (cpu, rm, NO_SP));
10329 aarch64_set_mem_u32 (cpu, address + 4,
10330 aarch64_get_reg_u32 (cpu, rn, NO_SP));
10331
10332 if (wb == Post)
10333 address += offset;
10334
10335 if (wb != NoWriteBack)
10336 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10337 }
10338
10339 static void
10340 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10341 {
10342 unsigned rn = INSTR (14, 10);
10343 unsigned rd = INSTR (9, 5);
10344 unsigned rm = INSTR (4, 0);
10345 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10346
10347 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10348 HALT_UNALLOC; /* ??? */
10349
10350 offset <<= 3;
10351
10352 if (wb != Post)
10353 address += offset;
10354
10355 aarch64_set_mem_u64 (cpu, address,
10356 aarch64_get_reg_u64 (cpu, rm, NO_SP));
10357 aarch64_set_mem_u64 (cpu, address + 8,
10358 aarch64_get_reg_u64 (cpu, rn, NO_SP));
10359
10360 if (wb == Post)
10361 address += offset;
10362
10363 if (wb != NoWriteBack)
10364 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10365 }
10366
10367 static void
10368 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10369 {
10370 unsigned rn = INSTR (14, 10);
10371 unsigned rd = INSTR (9, 5);
10372 unsigned rm = INSTR (4, 0);
10373 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10374
10375 /* Treat this as unalloc to make sure we don't do it. */
10376 if (rn == rm)
10377 HALT_UNALLOC;
10378
10379 offset <<= 2;
10380
10381 if (wb != Post)
10382 address += offset;
10383
10384 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
10385 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
10386
10387 if (wb == Post)
10388 address += offset;
10389
10390 if (wb != NoWriteBack)
10391 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10392 }
10393
10394 static void
10395 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10396 {
10397 unsigned rn = INSTR (14, 10);
10398 unsigned rd = INSTR (9, 5);
10399 unsigned rm = INSTR (4, 0);
10400 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10401
10402 /* Treat this as unalloc to make sure we don't do it. */
10403 if (rn == rm)
10404 HALT_UNALLOC;
10405
10406 offset <<= 2;
10407
10408 if (wb != Post)
10409 address += offset;
10410
10411 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
10412 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
10413
10414 if (wb == Post)
10415 address += offset;
10416
10417 if (wb != NoWriteBack)
10418 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10419 }
10420
10421 static void
10422 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10423 {
10424 unsigned rn = INSTR (14, 10);
10425 unsigned rd = INSTR (9, 5);
10426 unsigned rm = INSTR (4, 0);
10427 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10428
10429 /* Treat this as unalloc to make sure we don't do it. */
10430 if (rn == rm)
10431 HALT_UNALLOC;
10432
10433 offset <<= 3;
10434
10435 if (wb != Post)
10436 address += offset;
10437
10438 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
10439 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
10440
10441 if (wb == Post)
10442 address += offset;
10443
10444 if (wb != NoWriteBack)
10445 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10446 }
10447
10448 static void
10449 dex_load_store_pair_gr (sim_cpu *cpu)
10450 {
10451 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
10452 instr[29,25] = instruction encoding: 101_0
10453 instr[26] = V : 1 if fp 0 if gp
10454 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10455 instr[22] = load/store (1=> load)
10456 instr[21,15] = signed, scaled, offset
10457 instr[14,10] = Rn
10458 instr[ 9, 5] = Rd
10459 instr[ 4, 0] = Rm. */
10460
10461 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
10462 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10463
10464 switch (dispatch)
10465 {
10466 case 2: store_pair_u32 (cpu, offset, Post); return;
10467 case 3: load_pair_u32 (cpu, offset, Post); return;
10468 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
10469 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
10470 case 6: store_pair_u32 (cpu, offset, Pre); return;
10471 case 7: load_pair_u32 (cpu, offset, Pre); return;
10472
10473 case 11: load_pair_s32 (cpu, offset, Post); return;
10474 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
10475 case 15: load_pair_s32 (cpu, offset, Pre); return;
10476
10477 case 18: store_pair_u64 (cpu, offset, Post); return;
10478 case 19: load_pair_u64 (cpu, offset, Post); return;
10479 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
10480 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
10481 case 22: store_pair_u64 (cpu, offset, Pre); return;
10482 case 23: load_pair_u64 (cpu, offset, Pre); return;
10483
10484 default:
10485 HALT_UNALLOC;
10486 }
10487 }
10488
10489 static void
10490 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
10491 {
10492 unsigned rn = INSTR (14, 10);
10493 unsigned rd = INSTR (9, 5);
10494 unsigned rm = INSTR (4, 0);
10495 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10496
10497 offset <<= 2;
10498
10499 if (wb != Post)
10500 address += offset;
10501
10502 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
10503 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
10504
10505 if (wb == Post)
10506 address += offset;
10507
10508 if (wb != NoWriteBack)
10509 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10510 }
10511
10512 static void
10513 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10514 {
10515 unsigned rn = INSTR (14, 10);
10516 unsigned rd = INSTR (9, 5);
10517 unsigned rm = INSTR (4, 0);
10518 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10519
10520 offset <<= 3;
10521
10522 if (wb != Post)
10523 address += offset;
10524
10525 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
10526 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
10527
10528 if (wb == Post)
10529 address += offset;
10530
10531 if (wb != NoWriteBack)
10532 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10533 }
10534
10535 static void
10536 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10537 {
10538 FRegister a;
10539 unsigned rn = INSTR (14, 10);
10540 unsigned rd = INSTR (9, 5);
10541 unsigned rm = INSTR (4, 0);
10542 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10543
10544 offset <<= 4;
10545
10546 if (wb != Post)
10547 address += offset;
10548
10549 aarch64_get_FP_long_double (cpu, rm, & a);
10550 aarch64_set_mem_long_double (cpu, address, a);
10551 aarch64_get_FP_long_double (cpu, rn, & a);
10552 aarch64_set_mem_long_double (cpu, address + 16, a);
10553
10554 if (wb == Post)
10555 address += offset;
10556
10557 if (wb != NoWriteBack)
10558 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10559 }
10560
10561 static void
10562 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
10563 {
10564 unsigned rn = INSTR (14, 10);
10565 unsigned rd = INSTR (9, 5);
10566 unsigned rm = INSTR (4, 0);
10567 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10568
10569 if (rm == rn)
10570 HALT_UNALLOC;
10571
10572 offset <<= 2;
10573
10574 if (wb != Post)
10575 address += offset;
10576
10577 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
10578 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
10579
10580 if (wb == Post)
10581 address += offset;
10582
10583 if (wb != NoWriteBack)
10584 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10585 }
10586
10587 static void
10588 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10589 {
10590 unsigned rn = INSTR (14, 10);
10591 unsigned rd = INSTR (9, 5);
10592 unsigned rm = INSTR (4, 0);
10593 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10594
10595 if (rm == rn)
10596 HALT_UNALLOC;
10597
10598 offset <<= 3;
10599
10600 if (wb != Post)
10601 address += offset;
10602
10603 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
10604 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
10605
10606 if (wb == Post)
10607 address += offset;
10608
10609 if (wb != NoWriteBack)
10610 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10611 }
10612
10613 static void
10614 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10615 {
10616 FRegister a;
10617 unsigned rn = INSTR (14, 10);
10618 unsigned rd = INSTR (9, 5);
10619 unsigned rm = INSTR (4, 0);
10620 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10621
10622 if (rm == rn)
10623 HALT_UNALLOC;
10624
10625 offset <<= 4;
10626
10627 if (wb != Post)
10628 address += offset;
10629
10630 aarch64_get_mem_long_double (cpu, address, & a);
10631 aarch64_set_FP_long_double (cpu, rm, a);
10632 aarch64_get_mem_long_double (cpu, address + 16, & a);
10633 aarch64_set_FP_long_double (cpu, rn, a);
10634
10635 if (wb == Post)
10636 address += offset;
10637
10638 if (wb != NoWriteBack)
10639 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10640 }
10641
10642 static void
10643 dex_load_store_pair_fp (sim_cpu *cpu)
10644 {
10645 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
10646 instr[29,25] = instruction encoding
10647 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10648 instr[22] = load/store (1=> load)
10649 instr[21,15] = signed, scaled, offset
10650 instr[14,10] = Rn
10651 instr[ 9, 5] = Rd
10652 instr[ 4, 0] = Rm */
10653
10654 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
10655 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10656
10657 switch (dispatch)
10658 {
10659 case 2: store_pair_float (cpu, offset, Post); return;
10660 case 3: load_pair_float (cpu, offset, Post); return;
10661 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
10662 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
10663 case 6: store_pair_float (cpu, offset, Pre); return;
10664 case 7: load_pair_float (cpu, offset, Pre); return;
10665
10666 case 10: store_pair_double (cpu, offset, Post); return;
10667 case 11: load_pair_double (cpu, offset, Post); return;
10668 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
10669 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
10670 case 14: store_pair_double (cpu, offset, Pre); return;
10671 case 15: load_pair_double (cpu, offset, Pre); return;
10672
10673 case 18: store_pair_long_double (cpu, offset, Post); return;
10674 case 19: load_pair_long_double (cpu, offset, Post); return;
10675 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
10676 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
10677 case 22: store_pair_long_double (cpu, offset, Pre); return;
10678 case 23: load_pair_long_double (cpu, offset, Pre); return;
10679
10680 default:
10681 HALT_UNALLOC;
10682 }
10683 }
10684
10685 static inline unsigned
10686 vec_reg (unsigned v, unsigned o)
10687 {
10688 return (v + o) & 0x3F;
10689 }
10690
10691 /* Load multiple N-element structures to N consecutive registers. */
10692 static void
10693 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
10694 {
10695 int all = INSTR (30, 30);
10696 unsigned size = INSTR (11, 10);
10697 unsigned vd = INSTR (4, 0);
10698 unsigned i;
10699
10700 switch (size)
10701 {
10702 case 0: /* 8-bit operations. */
10703 if (all)
10704 for (i = 0; i < (16 * N); i++)
10705 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
10706 aarch64_get_mem_u8 (cpu, address + i));
10707 else
10708 for (i = 0; i < (8 * N); i++)
10709 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
10710 aarch64_get_mem_u8 (cpu, address + i));
10711 return;
10712
10713 case 1: /* 16-bit operations. */
10714 if (all)
10715 for (i = 0; i < (8 * N); i++)
10716 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
10717 aarch64_get_mem_u16 (cpu, address + i * 2));
10718 else
10719 for (i = 0; i < (4 * N); i++)
10720 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
10721 aarch64_get_mem_u16 (cpu, address + i * 2));
10722 return;
10723
10724 case 2: /* 32-bit operations. */
10725 if (all)
10726 for (i = 0; i < (4 * N); i++)
10727 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
10728 aarch64_get_mem_u32 (cpu, address + i * 4));
10729 else
10730 for (i = 0; i < (2 * N); i++)
10731 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
10732 aarch64_get_mem_u32 (cpu, address + i * 4));
10733 return;
10734
10735 case 3: /* 64-bit operations. */
10736 if (all)
10737 for (i = 0; i < (2 * N); i++)
10738 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
10739 aarch64_get_mem_u64 (cpu, address + i * 8));
10740 else
10741 for (i = 0; i < N; i++)
10742 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
10743 aarch64_get_mem_u64 (cpu, address + i * 8));
10744 return;
10745 }
10746 }
10747
10748 /* LD4: load multiple 4-element to four consecutive registers. */
10749 static void
10750 LD4 (sim_cpu *cpu, uint64_t address)
10751 {
10752 vec_load (cpu, address, 4);
10753 }
10754
10755 /* LD3: load multiple 3-element structures to three consecutive registers. */
10756 static void
10757 LD3 (sim_cpu *cpu, uint64_t address)
10758 {
10759 vec_load (cpu, address, 3);
10760 }
10761
10762 /* LD2: load multiple 2-element structures to two consecutive registers. */
10763 static void
10764 LD2 (sim_cpu *cpu, uint64_t address)
10765 {
10766 vec_load (cpu, address, 2);
10767 }
10768
10769 /* Load multiple 1-element structures into one register. */
10770 static void
10771 LD1_1 (sim_cpu *cpu, uint64_t address)
10772 {
10773 int all = INSTR (30, 30);
10774 unsigned size = INSTR (11, 10);
10775 unsigned vd = INSTR (4, 0);
10776 unsigned i;
10777
10778 switch (size)
10779 {
10780 case 0:
10781 /* LD1 {Vd.16b}, addr, #16 */
10782 /* LD1 {Vd.8b}, addr, #8 */
10783 for (i = 0; i < (all ? 16 : 8); i++)
10784 aarch64_set_vec_u8 (cpu, vd, i,
10785 aarch64_get_mem_u8 (cpu, address + i));
10786 return;
10787
10788 case 1:
10789 /* LD1 {Vd.8h}, addr, #16 */
10790 /* LD1 {Vd.4h}, addr, #8 */
10791 for (i = 0; i < (all ? 8 : 4); i++)
10792 aarch64_set_vec_u16 (cpu, vd, i,
10793 aarch64_get_mem_u16 (cpu, address + i * 2));
10794 return;
10795
10796 case 2:
10797 /* LD1 {Vd.4s}, addr, #16 */
10798 /* LD1 {Vd.2s}, addr, #8 */
10799 for (i = 0; i < (all ? 4 : 2); i++)
10800 aarch64_set_vec_u32 (cpu, vd, i,
10801 aarch64_get_mem_u32 (cpu, address + i * 4));
10802 return;
10803
10804 case 3:
10805 /* LD1 {Vd.2d}, addr, #16 */
10806 /* LD1 {Vd.1d}, addr, #8 */
10807 for (i = 0; i < (all ? 2 : 1); i++)
10808 aarch64_set_vec_u64 (cpu, vd, i,
10809 aarch64_get_mem_u64 (cpu, address + i * 8));
10810 return;
10811 }
10812 }
10813
10814 /* Load multiple 1-element structures into two registers. */
10815 static void
10816 LD1_2 (sim_cpu *cpu, uint64_t address)
10817 {
10818 /* FIXME: This algorithm is *exactly* the same as the LD2 version.
10819 So why have two different instructions ? There must be something
10820 wrong somewhere. */
10821 vec_load (cpu, address, 2);
10822 }
10823
10824 /* Load multiple 1-element structures into three registers. */
10825 static void
10826 LD1_3 (sim_cpu *cpu, uint64_t address)
10827 {
10828 /* FIXME: This algorithm is *exactly* the same as the LD3 version.
10829 So why have two different instructions ? There must be something
10830 wrong somewhere. */
10831 vec_load (cpu, address, 3);
10832 }
10833
10834 /* Load multiple 1-element structures into four registers. */
10835 static void
10836 LD1_4 (sim_cpu *cpu, uint64_t address)
10837 {
10838 /* FIXME: This algorithm is *exactly* the same as the LD4 version.
10839 So why have two different instructions ? There must be something
10840 wrong somewhere. */
10841 vec_load (cpu, address, 4);
10842 }
10843
10844 /* Store multiple N-element structures to N consecutive registers. */
10845 static void
10846 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
10847 {
10848 int all = INSTR (30, 30);
10849 unsigned size = INSTR (11, 10);
10850 unsigned vd = INSTR (4, 0);
10851 unsigned i;
10852
10853 switch (size)
10854 {
10855 case 0: /* 8-bit operations. */
10856 if (all)
10857 for (i = 0; i < (16 * N); i++)
10858 aarch64_set_mem_u8
10859 (cpu, address + i,
10860 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
10861 else
10862 for (i = 0; i < (8 * N); i++)
10863 aarch64_set_mem_u8
10864 (cpu, address + i,
10865 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
10866 return;
10867
10868 case 1: /* 16-bit operations. */
10869 if (all)
10870 for (i = 0; i < (8 * N); i++)
10871 aarch64_set_mem_u16
10872 (cpu, address + i * 2,
10873 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
10874 else
10875 for (i = 0; i < (4 * N); i++)
10876 aarch64_set_mem_u16
10877 (cpu, address + i * 2,
10878 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
10879 return;
10880
10881 case 2: /* 32-bit operations. */
10882 if (all)
10883 for (i = 0; i < (4 * N); i++)
10884 aarch64_set_mem_u32
10885 (cpu, address + i * 4,
10886 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
10887 else
10888 for (i = 0; i < (2 * N); i++)
10889 aarch64_set_mem_u32
10890 (cpu, address + i * 4,
10891 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
10892 return;
10893
10894 case 3: /* 64-bit operations. */
10895 if (all)
10896 for (i = 0; i < (2 * N); i++)
10897 aarch64_set_mem_u64
10898 (cpu, address + i * 8,
10899 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
10900 else
10901 for (i = 0; i < N; i++)
10902 aarch64_set_mem_u64
10903 (cpu, address + i * 8,
10904 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
10905 return;
10906 }
10907 }
10908
10909 /* Store multiple 4-element structure to four consecutive registers. */
10910 static void
10911 ST4 (sim_cpu *cpu, uint64_t address)
10912 {
10913 vec_store (cpu, address, 4);
10914 }
10915
10916 /* Store multiple 3-element structures to three consecutive registers. */
10917 static void
10918 ST3 (sim_cpu *cpu, uint64_t address)
10919 {
10920 vec_store (cpu, address, 3);
10921 }
10922
10923 /* Store multiple 2-element structures to two consecutive registers. */
10924 static void
10925 ST2 (sim_cpu *cpu, uint64_t address)
10926 {
10927 vec_store (cpu, address, 2);
10928 }
10929
10930 /* Store multiple 1-element structures into one register. */
10931 static void
10932 ST1_1 (sim_cpu *cpu, uint64_t address)
10933 {
10934 int all = INSTR (30, 30);
10935 unsigned size = INSTR (11, 10);
10936 unsigned vd = INSTR (4, 0);
10937 unsigned i;
10938
10939 switch (size)
10940 {
10941 case 0:
10942 for (i = 0; i < (all ? 16 : 8); i++)
10943 aarch64_set_mem_u8 (cpu, address + i,
10944 aarch64_get_vec_u8 (cpu, vd, i));
10945 return;
10946
10947 case 1:
10948 for (i = 0; i < (all ? 8 : 4); i++)
10949 aarch64_set_mem_u16 (cpu, address + i * 2,
10950 aarch64_get_vec_u16 (cpu, vd, i));
10951 return;
10952
10953 case 2:
10954 for (i = 0; i < (all ? 4 : 2); i++)
10955 aarch64_set_mem_u32 (cpu, address + i * 4,
10956 aarch64_get_vec_u32 (cpu, vd, i));
10957 return;
10958
10959 case 3:
10960 for (i = 0; i < (all ? 2 : 1); i++)
10961 aarch64_set_mem_u64 (cpu, address + i * 8,
10962 aarch64_get_vec_u64 (cpu, vd, i));
10963 return;
10964 }
10965 }
10966
10967 /* Store multiple 1-element structures into two registers. */
10968 static void
10969 ST1_2 (sim_cpu *cpu, uint64_t address)
10970 {
10971 /* FIXME: This algorithm is *exactly* the same as the ST2 version.
10972 So why have two different instructions ? There must be
10973 something wrong somewhere. */
10974 vec_store (cpu, address, 2);
10975 }
10976
10977 /* Store multiple 1-element structures into three registers. */
10978 static void
10979 ST1_3 (sim_cpu *cpu, uint64_t address)
10980 {
10981 /* FIXME: This algorithm is *exactly* the same as the ST3 version.
10982 So why have two different instructions ? There must be
10983 something wrong somewhere. */
10984 vec_store (cpu, address, 3);
10985 }
10986
10987 /* Store multiple 1-element structures into four registers. */
10988 static void
10989 ST1_4 (sim_cpu *cpu, uint64_t address)
10990 {
10991 /* FIXME: This algorithm is *exactly* the same as the ST4 version.
10992 So why have two different instructions ? There must be
10993 something wrong somewhere. */
10994 vec_store (cpu, address, 4);
10995 }
10996
10997 static void
10998 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
10999 {
11000 /* instr[31] = 0
11001 instr[30] = element selector 0=>half, 1=>all elements
11002 instr[29,24] = 00 1101
11003 instr[23] = 0=>simple, 1=>post
11004 instr[22] = 1
11005 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11006 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11007 11111 (immediate post inc)
11008 instr[15,14] = 11
11009 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11010 instr[12] = 0
11011 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11012 10=> word(s), 11=> double(d)
11013 instr[9,5] = address
11014 instr[4,0] = Vd */
11015
11016 unsigned full = INSTR (30, 30);
11017 unsigned vd = INSTR (4, 0);
11018 unsigned size = INSTR (11, 10);
11019 int i;
11020
11021 NYI_assert (29, 24, 0x0D);
11022 NYI_assert (22, 22, 1);
11023 NYI_assert (15, 14, 3);
11024 NYI_assert (12, 12, 0);
11025
11026 switch ((INSTR (13, 13) << 1) | INSTR (21, 21))
11027 {
11028 case 0: /* LD1R. */
11029 switch (size)
11030 {
11031 case 0:
11032 {
11033 uint8_t val = aarch64_get_mem_u8 (cpu, address);
11034 for (i = 0; i < (full ? 16 : 8); i++)
11035 aarch64_set_vec_u8 (cpu, vd, i, val);
11036 break;
11037 }
11038
11039 case 1:
11040 {
11041 uint16_t val = aarch64_get_mem_u16 (cpu, address);
11042 for (i = 0; i < (full ? 8 : 4); i++)
11043 aarch64_set_vec_u16 (cpu, vd, i, val);
11044 break;
11045 }
11046
11047 case 2:
11048 {
11049 uint32_t val = aarch64_get_mem_u32 (cpu, address);
11050 for (i = 0; i < (full ? 4 : 2); i++)
11051 aarch64_set_vec_u32 (cpu, vd, i, val);
11052 break;
11053 }
11054
11055 case 3:
11056 {
11057 uint64_t val = aarch64_get_mem_u64 (cpu, address);
11058 for (i = 0; i < (full ? 2 : 1); i++)
11059 aarch64_set_vec_u64 (cpu, vd, i, val);
11060 break;
11061 }
11062
11063 default:
11064 HALT_UNALLOC;
11065 }
11066 break;
11067
11068 case 1: /* LD2R. */
11069 switch (size)
11070 {
11071 case 0:
11072 {
11073 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11074 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11075
11076 for (i = 0; i < (full ? 16 : 8); i++)
11077 {
11078 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11079 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11080 }
11081 break;
11082 }
11083
11084 case 1:
11085 {
11086 uint16_t val1 = aarch64_get_mem_u16 (cpu, address);
11087 uint16_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11088
11089 for (i = 0; i < (full ? 8 : 4); i++)
11090 {
11091 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11092 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11093 }
11094 break;
11095 }
11096
11097 case 2:
11098 {
11099 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11100 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11101
11102 for (i = 0; i < (full ? 4 : 2); i++)
11103 {
11104 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11105 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11106 }
11107 break;
11108 }
11109
11110 case 3:
11111 {
11112 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11113 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11114
11115 for (i = 0; i < (full ? 2 : 1); i++)
11116 {
11117 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11118 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11119 }
11120 break;
11121 }
11122
11123 default:
11124 HALT_UNALLOC;
11125 }
11126 break;
11127
11128 case 2: /* LD3R. */
11129 switch (size)
11130 {
11131 case 0:
11132 {
11133 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11134 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11135 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11136
11137 for (i = 0; i < (full ? 16 : 8); i++)
11138 {
11139 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11140 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11141 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11142 }
11143 }
11144 break;
11145
11146 case 1:
11147 {
11148 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11149 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11150 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11151
11152 for (i = 0; i < (full ? 8 : 4); i++)
11153 {
11154 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11155 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11156 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11157 }
11158 }
11159 break;
11160
11161 case 2:
11162 {
11163 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11164 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11165 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11166
11167 for (i = 0; i < (full ? 4 : 2); i++)
11168 {
11169 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11170 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11171 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11172 }
11173 }
11174 break;
11175
11176 case 3:
11177 {
11178 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11179 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11180 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11181
11182 for (i = 0; i < (full ? 2 : 1); i++)
11183 {
11184 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11185 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11186 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11187 }
11188 }
11189 break;
11190
11191 default:
11192 HALT_UNALLOC;
11193 }
11194 break;
11195
11196 case 3: /* LD4R. */
11197 switch (size)
11198 {
11199 case 0:
11200 {
11201 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11202 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11203 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11204 uint8_t val4 = aarch64_get_mem_u8 (cpu, address + 3);
11205
11206 for (i = 0; i < (full ? 16 : 8); i++)
11207 {
11208 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11209 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11210 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11211 aarch64_set_vec_u8 (cpu, vd + 3, 0, val4);
11212 }
11213 }
11214 break;
11215
11216 case 1:
11217 {
11218 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11219 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11220 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11221 uint32_t val4 = aarch64_get_mem_u16 (cpu, address + 6);
11222
11223 for (i = 0; i < (full ? 8 : 4); i++)
11224 {
11225 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11226 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11227 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11228 aarch64_set_vec_u16 (cpu, vd + 3, 0, val4);
11229 }
11230 }
11231 break;
11232
11233 case 2:
11234 {
11235 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11236 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11237 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11238 uint32_t val4 = aarch64_get_mem_u32 (cpu, address + 12);
11239
11240 for (i = 0; i < (full ? 4 : 2); i++)
11241 {
11242 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11243 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11244 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11245 aarch64_set_vec_u32 (cpu, vd + 3, 0, val4);
11246 }
11247 }
11248 break;
11249
11250 case 3:
11251 {
11252 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11253 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11254 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11255 uint64_t val4 = aarch64_get_mem_u64 (cpu, address + 24);
11256
11257 for (i = 0; i < (full ? 2 : 1); i++)
11258 {
11259 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11260 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11261 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11262 aarch64_set_vec_u64 (cpu, vd + 3, 0, val4);
11263 }
11264 }
11265 break;
11266
11267 default:
11268 HALT_UNALLOC;
11269 }
11270 break;
11271
11272 default:
11273 HALT_UNALLOC;
11274 }
11275 }
11276
11277 static void
11278 do_vec_load_store (sim_cpu *cpu)
11279 {
11280 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11281
11282 instr[31] = 0
11283 instr[30] = element selector 0=>half, 1=>all elements
11284 instr[29,25] = 00110
11285 instr[24] = ?
11286 instr[23] = 0=>simple, 1=>post
11287 instr[22] = 0=>store, 1=>load
11288 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
11289 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11290 11111 (immediate post inc)
11291 instr[15,12] = elements and destinations. eg for load:
11292 0000=>LD4 => load multiple 4-element to
11293 four consecutive registers
11294 0100=>LD3 => load multiple 3-element to
11295 three consecutive registers
11296 1000=>LD2 => load multiple 2-element to
11297 two consecutive registers
11298 0010=>LD1 => load multiple 1-element to
11299 four consecutive registers
11300 0110=>LD1 => load multiple 1-element to
11301 three consecutive registers
11302 1010=>LD1 => load multiple 1-element to
11303 two consecutive registers
11304 0111=>LD1 => load multiple 1-element to
11305 one register
11306 1100=>LDR1,LDR2
11307 1110=>LDR3,LDR4
11308 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11309 10=> word(s), 11=> double(d)
11310 instr[9,5] = Vn, can be SP
11311 instr[4,0] = Vd */
11312
11313 int post;
11314 int load;
11315 unsigned vn;
11316 uint64_t address;
11317 int type;
11318
11319 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
11320 HALT_NYI;
11321
11322 type = INSTR (15, 12);
11323 if (type != 0xE && type != 0xE && INSTR (21, 21) != 0)
11324 HALT_NYI;
11325
11326 post = INSTR (23, 23);
11327 load = INSTR (22, 22);
11328 vn = INSTR (9, 5);
11329 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11330
11331 if (post)
11332 {
11333 unsigned vm = INSTR (20, 16);
11334
11335 if (vm == R31)
11336 {
11337 unsigned sizeof_operation;
11338
11339 switch (type)
11340 {
11341 case 0: sizeof_operation = 32; break;
11342 case 4: sizeof_operation = 24; break;
11343 case 8: sizeof_operation = 16; break;
11344
11345 case 0xC:
11346 sizeof_operation = INSTR (21, 21) ? 2 : 1;
11347 sizeof_operation <<= INSTR (11, 10);
11348 break;
11349
11350 case 0xE:
11351 sizeof_operation = INSTR (21, 21) ? 8 : 4;
11352 sizeof_operation <<= INSTR (11, 10);
11353 break;
11354
11355 case 7:
11356 /* One register, immediate offset variant. */
11357 sizeof_operation = 8;
11358 break;
11359
11360 case 10:
11361 /* Two registers, immediate offset variant. */
11362 sizeof_operation = 16;
11363 break;
11364
11365 case 6:
11366 /* Three registers, immediate offset variant. */
11367 sizeof_operation = 24;
11368 break;
11369
11370 case 2:
11371 /* Four registers, immediate offset variant. */
11372 sizeof_operation = 32;
11373 break;
11374
11375 default:
11376 HALT_UNALLOC;
11377 }
11378
11379 if (INSTR (30, 30))
11380 sizeof_operation *= 2;
11381
11382 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
11383 }
11384 else
11385 aarch64_set_reg_u64 (cpu, vn, SP_OK,
11386 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
11387 }
11388 else
11389 {
11390 NYI_assert (20, 16, 0);
11391 }
11392
11393 if (load)
11394 {
11395 switch (type)
11396 {
11397 case 0: LD4 (cpu, address); return;
11398 case 4: LD3 (cpu, address); return;
11399 case 8: LD2 (cpu, address); return;
11400 case 2: LD1_4 (cpu, address); return;
11401 case 6: LD1_3 (cpu, address); return;
11402 case 10: LD1_2 (cpu, address); return;
11403 case 7: LD1_1 (cpu, address); return;
11404
11405 case 0xE:
11406 case 0xC: do_vec_LDnR (cpu, address); return;
11407
11408 default:
11409 HALT_NYI;
11410 }
11411 }
11412
11413 /* Stores. */
11414 switch (type)
11415 {
11416 case 0: ST4 (cpu, address); return;
11417 case 4: ST3 (cpu, address); return;
11418 case 8: ST2 (cpu, address); return;
11419 case 2: ST1_4 (cpu, address); return;
11420 case 6: ST1_3 (cpu, address); return;
11421 case 10: ST1_2 (cpu, address); return;
11422 case 7: ST1_1 (cpu, address); return;
11423 default:
11424 HALT_NYI;
11425 }
11426 }
11427
11428 static void
11429 dexLdSt (sim_cpu *cpu)
11430 {
11431 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
11432 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
11433 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
11434 bits [29,28:26] of a LS are the secondary dispatch vector. */
11435 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
11436
11437 switch (group2)
11438 {
11439 case LS_EXCL_000:
11440 dexLoadExclusive (cpu); return;
11441
11442 case LS_LIT_010:
11443 case LS_LIT_011:
11444 dexLoadLiteral (cpu); return;
11445
11446 case LS_OTHER_110:
11447 case LS_OTHER_111:
11448 dexLoadOther (cpu); return;
11449
11450 case LS_ADVSIMD_001:
11451 do_vec_load_store (cpu); return;
11452
11453 case LS_PAIR_100:
11454 dex_load_store_pair_gr (cpu); return;
11455
11456 case LS_PAIR_101:
11457 dex_load_store_pair_fp (cpu); return;
11458
11459 default:
11460 /* Should never reach here. */
11461 HALT_NYI;
11462 }
11463 }
11464
11465 /* Specific decode and execute for group Data Processing Register. */
11466
11467 static void
11468 dexLogicalShiftedRegister (sim_cpu *cpu)
11469 {
11470 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
11471 instr[30,29] = op
11472 instr[28:24] = 01010
11473 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
11474 instr[21] = N
11475 instr[20,16] = Rm
11476 instr[15,10] = count : must be 0xxxxx for 32 bit
11477 instr[9,5] = Rn
11478 instr[4,0] = Rd */
11479
11480 uint32_t size = INSTR (31, 31);
11481 Shift shiftType = INSTR (23, 22);
11482 uint32_t count = INSTR (15, 10);
11483
11484 /* 32 bit operations must have count[5] = 0.
11485 or else we have an UNALLOC. */
11486 if (size == 0 && uimm (count, 5, 5))
11487 HALT_UNALLOC;
11488
11489 /* Dispatch on size:op:N. */
11490 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
11491 {
11492 case 0: and32_shift (cpu, shiftType, count); return;
11493 case 1: bic32_shift (cpu, shiftType, count); return;
11494 case 2: orr32_shift (cpu, shiftType, count); return;
11495 case 3: orn32_shift (cpu, shiftType, count); return;
11496 case 4: eor32_shift (cpu, shiftType, count); return;
11497 case 5: eon32_shift (cpu, shiftType, count); return;
11498 case 6: ands32_shift (cpu, shiftType, count); return;
11499 case 7: bics32_shift (cpu, shiftType, count); return;
11500 case 8: and64_shift (cpu, shiftType, count); return;
11501 case 9: bic64_shift (cpu, shiftType, count); return;
11502 case 10:orr64_shift (cpu, shiftType, count); return;
11503 case 11:orn64_shift (cpu, shiftType, count); return;
11504 case 12:eor64_shift (cpu, shiftType, count); return;
11505 case 13:eon64_shift (cpu, shiftType, count); return;
11506 case 14:ands64_shift (cpu, shiftType, count); return;
11507 case 15:bics64_shift (cpu, shiftType, count); return;
11508 }
11509 }
11510
11511 /* 32 bit conditional select. */
11512 static void
11513 csel32 (sim_cpu *cpu, CondCode cc)
11514 {
11515 unsigned rm = INSTR (20, 16);
11516 unsigned rn = INSTR (9, 5);
11517 unsigned rd = INSTR (4, 0);
11518
11519 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11520 testConditionCode (cpu, cc)
11521 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11522 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
11523 }
11524
11525 /* 64 bit conditional select. */
11526 static void
11527 csel64 (sim_cpu *cpu, CondCode cc)
11528 {
11529 unsigned rm = INSTR (20, 16);
11530 unsigned rn = INSTR (9, 5);
11531 unsigned rd = INSTR (4, 0);
11532
11533 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11534 testConditionCode (cpu, cc)
11535 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11536 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
11537 }
11538
11539 /* 32 bit conditional increment. */
11540 static void
11541 csinc32 (sim_cpu *cpu, CondCode cc)
11542 {
11543 unsigned rm = INSTR (20, 16);
11544 unsigned rn = INSTR (9, 5);
11545 unsigned rd = INSTR (4, 0);
11546
11547 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11548 testConditionCode (cpu, cc)
11549 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11550 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
11551 }
11552
11553 /* 64 bit conditional increment. */
11554 static void
11555 csinc64 (sim_cpu *cpu, CondCode cc)
11556 {
11557 unsigned rm = INSTR (20, 16);
11558 unsigned rn = INSTR (9, 5);
11559 unsigned rd = INSTR (4, 0);
11560
11561 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11562 testConditionCode (cpu, cc)
11563 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11564 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
11565 }
11566
11567 /* 32 bit conditional invert. */
11568 static void
11569 csinv32 (sim_cpu *cpu, CondCode cc)
11570 {
11571 unsigned rm = INSTR (20, 16);
11572 unsigned rn = INSTR (9, 5);
11573 unsigned rd = INSTR (4, 0);
11574
11575 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11576 testConditionCode (cpu, cc)
11577 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11578 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
11579 }
11580
11581 /* 64 bit conditional invert. */
11582 static void
11583 csinv64 (sim_cpu *cpu, CondCode cc)
11584 {
11585 unsigned rm = INSTR (20, 16);
11586 unsigned rn = INSTR (9, 5);
11587 unsigned rd = INSTR (4, 0);
11588
11589 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11590 testConditionCode (cpu, cc)
11591 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11592 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
11593 }
11594
11595 /* 32 bit conditional negate. */
11596 static void
11597 csneg32 (sim_cpu *cpu, CondCode cc)
11598 {
11599 unsigned rm = INSTR (20, 16);
11600 unsigned rn = INSTR (9, 5);
11601 unsigned rd = INSTR (4, 0);
11602
11603 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11604 testConditionCode (cpu, cc)
11605 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11606 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
11607 }
11608
11609 /* 64 bit conditional negate. */
11610 static void
11611 csneg64 (sim_cpu *cpu, CondCode cc)
11612 {
11613 unsigned rm = INSTR (20, 16);
11614 unsigned rn = INSTR (9, 5);
11615 unsigned rd = INSTR (4, 0);
11616
11617 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11618 testConditionCode (cpu, cc)
11619 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11620 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
11621 }
11622
11623 static void
11624 dexCondSelect (sim_cpu *cpu)
11625 {
11626 /* instr[28,21] = 11011011
11627 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
11628 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
11629 100 ==> CSINV, 101 ==> CSNEG,
11630 _1_ ==> UNALLOC
11631 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
11632 instr[15,12] = cond
11633 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
11634
11635 CondCode cc = INSTR (15, 12);
11636 uint32_t S = INSTR (29, 29);
11637 uint32_t op2 = INSTR (11, 10);
11638
11639 if (S == 1)
11640 HALT_UNALLOC;
11641
11642 if (op2 & 0x2)
11643 HALT_UNALLOC;
11644
11645 switch ((INSTR (31, 30) << 1) | op2)
11646 {
11647 case 0: csel32 (cpu, cc); return;
11648 case 1: csinc32 (cpu, cc); return;
11649 case 2: csinv32 (cpu, cc); return;
11650 case 3: csneg32 (cpu, cc); return;
11651 case 4: csel64 (cpu, cc); return;
11652 case 5: csinc64 (cpu, cc); return;
11653 case 6: csinv64 (cpu, cc); return;
11654 case 7: csneg64 (cpu, cc); return;
11655 }
11656 }
11657
11658 /* Some helpers for counting leading 1 or 0 bits. */
11659
11660 /* Counts the number of leading bits which are the same
11661 in a 32 bit value in the range 1 to 32. */
11662 static uint32_t
11663 leading32 (uint32_t value)
11664 {
11665 int32_t mask= 0xffff0000;
11666 uint32_t count= 16; /* Counts number of bits set in mask. */
11667 uint32_t lo = 1; /* Lower bound for number of sign bits. */
11668 uint32_t hi = 32; /* Upper bound for number of sign bits. */
11669
11670 while (lo + 1 < hi)
11671 {
11672 int32_t test = (value & mask);
11673
11674 if (test == 0 || test == mask)
11675 {
11676 lo = count;
11677 count = (lo + hi) / 2;
11678 mask >>= (count - lo);
11679 }
11680 else
11681 {
11682 hi = count;
11683 count = (lo + hi) / 2;
11684 mask <<= hi - count;
11685 }
11686 }
11687
11688 if (lo != hi)
11689 {
11690 int32_t test;
11691
11692 mask >>= 1;
11693 test = (value & mask);
11694
11695 if (test == 0 || test == mask)
11696 count = hi;
11697 else
11698 count = lo;
11699 }
11700
11701 return count;
11702 }
11703
11704 /* Counts the number of leading bits which are the same
11705 in a 64 bit value in the range 1 to 64. */
11706 static uint64_t
11707 leading64 (uint64_t value)
11708 {
11709 int64_t mask= 0xffffffff00000000LL;
11710 uint64_t count = 32; /* Counts number of bits set in mask. */
11711 uint64_t lo = 1; /* Lower bound for number of sign bits. */
11712 uint64_t hi = 64; /* Upper bound for number of sign bits. */
11713
11714 while (lo + 1 < hi)
11715 {
11716 int64_t test = (value & mask);
11717
11718 if (test == 0 || test == mask)
11719 {
11720 lo = count;
11721 count = (lo + hi) / 2;
11722 mask >>= (count - lo);
11723 }
11724 else
11725 {
11726 hi = count;
11727 count = (lo + hi) / 2;
11728 mask <<= hi - count;
11729 }
11730 }
11731
11732 if (lo != hi)
11733 {
11734 int64_t test;
11735
11736 mask >>= 1;
11737 test = (value & mask);
11738
11739 if (test == 0 || test == mask)
11740 count = hi;
11741 else
11742 count = lo;
11743 }
11744
11745 return count;
11746 }
11747
11748 /* Bit operations. */
11749 /* N.B register args may not be SP. */
11750
11751 /* 32 bit count leading sign bits. */
11752 static void
11753 cls32 (sim_cpu *cpu)
11754 {
11755 unsigned rn = INSTR (9, 5);
11756 unsigned rd = INSTR (4, 0);
11757
11758 /* N.B. the result needs to exclude the leading bit. */
11759 aarch64_set_reg_u64
11760 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
11761 }
11762
11763 /* 64 bit count leading sign bits. */
11764 static void
11765 cls64 (sim_cpu *cpu)
11766 {
11767 unsigned rn = INSTR (9, 5);
11768 unsigned rd = INSTR (4, 0);
11769
11770 /* N.B. the result needs to exclude the leading bit. */
11771 aarch64_set_reg_u64
11772 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
11773 }
11774
11775 /* 32 bit count leading zero bits. */
11776 static void
11777 clz32 (sim_cpu *cpu)
11778 {
11779 unsigned rn = INSTR (9, 5);
11780 unsigned rd = INSTR (4, 0);
11781 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11782
11783 /* if the sign (top) bit is set then the count is 0. */
11784 if (pick32 (value, 31, 31))
11785 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
11786 else
11787 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
11788 }
11789
11790 /* 64 bit count leading zero bits. */
11791 static void
11792 clz64 (sim_cpu *cpu)
11793 {
11794 unsigned rn = INSTR (9, 5);
11795 unsigned rd = INSTR (4, 0);
11796 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
11797
11798 /* if the sign (top) bit is set then the count is 0. */
11799 if (pick64 (value, 63, 63))
11800 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
11801 else
11802 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
11803 }
11804
11805 /* 32 bit reverse bits. */
11806 static void
11807 rbit32 (sim_cpu *cpu)
11808 {
11809 unsigned rn = INSTR (9, 5);
11810 unsigned rd = INSTR (4, 0);
11811 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11812 uint32_t result = 0;
11813 int i;
11814
11815 for (i = 0; i < 32; i++)
11816 {
11817 result <<= 1;
11818 result |= (value & 1);
11819 value >>= 1;
11820 }
11821 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11822 }
11823
11824 /* 64 bit reverse bits. */
11825 static void
11826 rbit64 (sim_cpu *cpu)
11827 {
11828 unsigned rn = INSTR (9, 5);
11829 unsigned rd = INSTR (4, 0);
11830 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
11831 uint64_t result = 0;
11832 int i;
11833
11834 for (i = 0; i < 64; i++)
11835 {
11836 result <<= 1;
11837 result |= (value & 1UL);
11838 value >>= 1;
11839 }
11840 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11841 }
11842
11843 /* 32 bit reverse bytes. */
11844 static void
11845 rev32 (sim_cpu *cpu)
11846 {
11847 unsigned rn = INSTR (9, 5);
11848 unsigned rd = INSTR (4, 0);
11849 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11850 uint32_t result = 0;
11851 int i;
11852
11853 for (i = 0; i < 4; i++)
11854 {
11855 result <<= 8;
11856 result |= (value & 0xff);
11857 value >>= 8;
11858 }
11859 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11860 }
11861
11862 /* 64 bit reverse bytes. */
11863 static void
11864 rev64 (sim_cpu *cpu)
11865 {
11866 unsigned rn = INSTR (9, 5);
11867 unsigned rd = INSTR (4, 0);
11868 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
11869 uint64_t result = 0;
11870 int i;
11871
11872 for (i = 0; i < 8; i++)
11873 {
11874 result <<= 8;
11875 result |= (value & 0xffULL);
11876 value >>= 8;
11877 }
11878 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11879 }
11880
11881 /* 32 bit reverse shorts. */
11882 /* N.B.this reverses the order of the bytes in each half word. */
11883 static void
11884 revh32 (sim_cpu *cpu)
11885 {
11886 unsigned rn = INSTR (9, 5);
11887 unsigned rd = INSTR (4, 0);
11888 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
11889 uint32_t result = 0;
11890 int i;
11891
11892 for (i = 0; i < 2; i++)
11893 {
11894 result <<= 8;
11895 result |= (value & 0x00ff00ff);
11896 value >>= 8;
11897 }
11898 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11899 }
11900
11901 /* 64 bit reverse shorts. */
11902 /* N.B.this reverses the order of the bytes in each half word. */
11903 static void
11904 revh64 (sim_cpu *cpu)
11905 {
11906 unsigned rn = INSTR (9, 5);
11907 unsigned rd = INSTR (4, 0);
11908 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
11909 uint64_t result = 0;
11910 int i;
11911
11912 for (i = 0; i < 2; i++)
11913 {
11914 result <<= 8;
11915 result |= (value & 0x00ff00ff00ff00ffULL);
11916 value >>= 8;
11917 }
11918 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
11919 }
11920
11921 static void
11922 dexDataProc1Source (sim_cpu *cpu)
11923 {
11924 /* instr[30] = 1
11925 instr[28,21] = 111010110
11926 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
11927 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
11928 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
11929 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
11930 000010 ==> REV, 000011 ==> UNALLOC
11931 000100 ==> CLZ, 000101 ==> CLS
11932 ow ==> UNALLOC
11933 instr[9,5] = rn : may not be SP
11934 instr[4,0] = rd : may not be SP. */
11935
11936 uint32_t S = INSTR (29, 29);
11937 uint32_t opcode2 = INSTR (20, 16);
11938 uint32_t opcode = INSTR (15, 10);
11939 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
11940
11941 if (S == 1)
11942 HALT_UNALLOC;
11943
11944 if (opcode2 != 0)
11945 HALT_UNALLOC;
11946
11947 if (opcode & 0x38)
11948 HALT_UNALLOC;
11949
11950 switch (dispatch)
11951 {
11952 case 0: rbit32 (cpu); return;
11953 case 1: revh32 (cpu); return;
11954 case 2: rev32 (cpu); return;
11955 case 4: clz32 (cpu); return;
11956 case 5: cls32 (cpu); return;
11957 case 8: rbit64 (cpu); return;
11958 case 9: revh64 (cpu); return;
11959 case 10:rev32 (cpu); return;
11960 case 11:rev64 (cpu); return;
11961 case 12:clz64 (cpu); return;
11962 case 13:cls64 (cpu); return;
11963 default: HALT_UNALLOC;
11964 }
11965 }
11966
11967 /* Variable shift.
11968 Shifts by count supplied in register.
11969 N.B register args may not be SP.
11970 These all use the shifted auxiliary function for
11971 simplicity and clarity. Writing the actual shift
11972 inline would avoid a branch and so be faster but
11973 would also necessitate getting signs right. */
11974
11975 /* 32 bit arithmetic shift right. */
11976 static void
11977 asrv32 (sim_cpu *cpu)
11978 {
11979 unsigned rm = INSTR (20, 16);
11980 unsigned rn = INSTR (9, 5);
11981 unsigned rd = INSTR (4, 0);
11982
11983 aarch64_set_reg_u64
11984 (cpu, rd, NO_SP,
11985 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
11986 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
11987 }
11988
11989 /* 64 bit arithmetic shift right. */
11990 static void
11991 asrv64 (sim_cpu *cpu)
11992 {
11993 unsigned rm = INSTR (20, 16);
11994 unsigned rn = INSTR (9, 5);
11995 unsigned rd = INSTR (4, 0);
11996
11997 aarch64_set_reg_u64
11998 (cpu, rd, NO_SP,
11999 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12000 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12001 }
12002
12003 /* 32 bit logical shift left. */
12004 static void
12005 lslv32 (sim_cpu *cpu)
12006 {
12007 unsigned rm = INSTR (20, 16);
12008 unsigned rn = INSTR (9, 5);
12009 unsigned rd = INSTR (4, 0);
12010
12011 aarch64_set_reg_u64
12012 (cpu, rd, NO_SP,
12013 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12014 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12015 }
12016
12017 /* 64 bit arithmetic shift left. */
12018 static void
12019 lslv64 (sim_cpu *cpu)
12020 {
12021 unsigned rm = INSTR (20, 16);
12022 unsigned rn = INSTR (9, 5);
12023 unsigned rd = INSTR (4, 0);
12024
12025 aarch64_set_reg_u64
12026 (cpu, rd, NO_SP,
12027 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12028 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12029 }
12030
12031 /* 32 bit logical shift right. */
12032 static void
12033 lsrv32 (sim_cpu *cpu)
12034 {
12035 unsigned rm = INSTR (20, 16);
12036 unsigned rn = INSTR (9, 5);
12037 unsigned rd = INSTR (4, 0);
12038
12039 aarch64_set_reg_u64
12040 (cpu, rd, NO_SP,
12041 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12042 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12043 }
12044
12045 /* 64 bit logical shift right. */
12046 static void
12047 lsrv64 (sim_cpu *cpu)
12048 {
12049 unsigned rm = INSTR (20, 16);
12050 unsigned rn = INSTR (9, 5);
12051 unsigned rd = INSTR (4, 0);
12052
12053 aarch64_set_reg_u64
12054 (cpu, rd, NO_SP,
12055 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12056 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12057 }
12058
12059 /* 32 bit rotate right. */
12060 static void
12061 rorv32 (sim_cpu *cpu)
12062 {
12063 unsigned rm = INSTR (20, 16);
12064 unsigned rn = INSTR (9, 5);
12065 unsigned rd = INSTR (4, 0);
12066
12067 aarch64_set_reg_u64
12068 (cpu, rd, NO_SP,
12069 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12070 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12071 }
12072
12073 /* 64 bit rotate right. */
12074 static void
12075 rorv64 (sim_cpu *cpu)
12076 {
12077 unsigned rm = INSTR (20, 16);
12078 unsigned rn = INSTR (9, 5);
12079 unsigned rd = INSTR (4, 0);
12080
12081 aarch64_set_reg_u64
12082 (cpu, rd, NO_SP,
12083 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12084 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12085 }
12086
12087
12088 /* divide. */
12089
12090 /* 32 bit signed divide. */
12091 static void
12092 cpuiv32 (sim_cpu *cpu)
12093 {
12094 unsigned rm = INSTR (20, 16);
12095 unsigned rn = INSTR (9, 5);
12096 unsigned rd = INSTR (4, 0);
12097 /* N.B. the pseudo-code does the divide using 64 bit data. */
12098 /* TODO : check that this rounds towards zero as required. */
12099 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12100 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12101
12102 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12103 divisor ? ((int32_t) (dividend / divisor)) : 0);
12104 }
12105
12106 /* 64 bit signed divide. */
12107 static void
12108 cpuiv64 (sim_cpu *cpu)
12109 {
12110 unsigned rm = INSTR (20, 16);
12111 unsigned rn = INSTR (9, 5);
12112 unsigned rd = INSTR (4, 0);
12113
12114 /* TODO : check that this rounds towards zero as required. */
12115 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12116
12117 aarch64_set_reg_s64
12118 (cpu, rd, NO_SP,
12119 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12120 }
12121
12122 /* 32 bit unsigned divide. */
12123 static void
12124 udiv32 (sim_cpu *cpu)
12125 {
12126 unsigned rm = INSTR (20, 16);
12127 unsigned rn = INSTR (9, 5);
12128 unsigned rd = INSTR (4, 0);
12129
12130 /* N.B. the pseudo-code does the divide using 64 bit data. */
12131 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12132 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12133
12134 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12135 divisor ? (uint32_t) (dividend / divisor) : 0);
12136 }
12137
12138 /* 64 bit unsigned divide. */
12139 static void
12140 udiv64 (sim_cpu *cpu)
12141 {
12142 unsigned rm = INSTR (20, 16);
12143 unsigned rn = INSTR (9, 5);
12144 unsigned rd = INSTR (4, 0);
12145
12146 /* TODO : check that this rounds towards zero as required. */
12147 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12148
12149 aarch64_set_reg_u64
12150 (cpu, rd, NO_SP,
12151 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12152 }
12153
12154 static void
12155 dexDataProc2Source (sim_cpu *cpu)
12156 {
12157 /* assert instr[30] == 0
12158 instr[28,21] == 11010110
12159 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12160 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12161 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12162 001000 ==> LSLV, 001001 ==> LSRV
12163 001010 ==> ASRV, 001011 ==> RORV
12164 ow ==> UNALLOC. */
12165
12166 uint32_t dispatch;
12167 uint32_t S = INSTR (29, 29);
12168 uint32_t opcode = INSTR (15, 10);
12169
12170 if (S == 1)
12171 HALT_UNALLOC;
12172
12173 if (opcode & 0x34)
12174 HALT_UNALLOC;
12175
12176 dispatch = ( (INSTR (31, 31) << 3)
12177 | (uimm (opcode, 3, 3) << 2)
12178 | uimm (opcode, 1, 0));
12179 switch (dispatch)
12180 {
12181 case 2: udiv32 (cpu); return;
12182 case 3: cpuiv32 (cpu); return;
12183 case 4: lslv32 (cpu); return;
12184 case 5: lsrv32 (cpu); return;
12185 case 6: asrv32 (cpu); return;
12186 case 7: rorv32 (cpu); return;
12187 case 10: udiv64 (cpu); return;
12188 case 11: cpuiv64 (cpu); return;
12189 case 12: lslv64 (cpu); return;
12190 case 13: lsrv64 (cpu); return;
12191 case 14: asrv64 (cpu); return;
12192 case 15: rorv64 (cpu); return;
12193 default: HALT_UNALLOC;
12194 }
12195 }
12196
12197
12198 /* Multiply. */
12199
12200 /* 32 bit multiply and add. */
12201 static void
12202 madd32 (sim_cpu *cpu)
12203 {
12204 unsigned rm = INSTR (20, 16);
12205 unsigned ra = INSTR (14, 10);
12206 unsigned rn = INSTR (9, 5);
12207 unsigned rd = INSTR (4, 0);
12208
12209 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12210 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12211 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12212 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12213 }
12214
12215 /* 64 bit multiply and add. */
12216 static void
12217 madd64 (sim_cpu *cpu)
12218 {
12219 unsigned rm = INSTR (20, 16);
12220 unsigned ra = INSTR (14, 10);
12221 unsigned rn = INSTR (9, 5);
12222 unsigned rd = INSTR (4, 0);
12223
12224 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12225 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12226 + aarch64_get_reg_u64 (cpu, rn, NO_SP)
12227 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12228 }
12229
12230 /* 32 bit multiply and sub. */
12231 static void
12232 msub32 (sim_cpu *cpu)
12233 {
12234 unsigned rm = INSTR (20, 16);
12235 unsigned ra = INSTR (14, 10);
12236 unsigned rn = INSTR (9, 5);
12237 unsigned rd = INSTR (4, 0);
12238
12239 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12240 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12241 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12242 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12243 }
12244
12245 /* 64 bit multiply and sub. */
12246 static void
12247 msub64 (sim_cpu *cpu)
12248 {
12249 unsigned rm = INSTR (20, 16);
12250 unsigned ra = INSTR (14, 10);
12251 unsigned rn = INSTR (9, 5);
12252 unsigned rd = INSTR (4, 0);
12253
12254 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12255 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12256 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12257 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12258 }
12259
12260 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
12261 static void
12262 smaddl (sim_cpu *cpu)
12263 {
12264 unsigned rm = INSTR (20, 16);
12265 unsigned ra = INSTR (14, 10);
12266 unsigned rn = INSTR (9, 5);
12267 unsigned rd = INSTR (4, 0);
12268
12269 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12270 obtain a 64 bit product. */
12271 aarch64_set_reg_s64
12272 (cpu, rd, NO_SP,
12273 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12274 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12275 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12276 }
12277
12278 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12279 static void
12280 smsubl (sim_cpu *cpu)
12281 {
12282 unsigned rm = INSTR (20, 16);
12283 unsigned ra = INSTR (14, 10);
12284 unsigned rn = INSTR (9, 5);
12285 unsigned rd = INSTR (4, 0);
12286
12287 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12288 obtain a 64 bit product. */
12289 aarch64_set_reg_s64
12290 (cpu, rd, NO_SP,
12291 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12292 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12293 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12294 }
12295
12296 /* Integer Multiply/Divide. */
12297
12298 /* First some macros and a helper function. */
12299 /* Macros to test or access elements of 64 bit words. */
12300
12301 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
12302 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12303 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12304 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12305 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12306 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12307
12308 /* Offset of sign bit in 64 bit signed integger. */
12309 #define SIGN_SHIFT_U64 63
12310 /* The sign bit itself -- also identifies the minimum negative int value. */
12311 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12312 /* Return true if a 64 bit signed int presented as an unsigned int is the
12313 most negative value. */
12314 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12315 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12316 int has its sign bit set to false. */
12317 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12318 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12319 an unsigned int has its sign bit set or not. */
12320 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12321 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
12322 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12323
12324 /* Multiply two 64 bit ints and return.
12325 the hi 64 bits of the 128 bit product. */
12326
12327 static uint64_t
12328 mul64hi (uint64_t value1, uint64_t value2)
12329 {
12330 uint64_t resultmid1;
12331 uint64_t result;
12332 uint64_t value1_lo = lowWordToU64 (value1);
12333 uint64_t value1_hi = highWordToU64 (value1) ;
12334 uint64_t value2_lo = lowWordToU64 (value2);
12335 uint64_t value2_hi = highWordToU64 (value2);
12336
12337 /* Cross-multiply and collect results. */
12338
12339 uint64_t xproductlo = value1_lo * value2_lo;
12340 uint64_t xproductmid1 = value1_lo * value2_hi;
12341 uint64_t xproductmid2 = value1_hi * value2_lo;
12342 uint64_t xproducthi = value1_hi * value2_hi;
12343 uint64_t carry = 0;
12344 /* Start accumulating 64 bit results. */
12345 /* Drop bottom half of lowest cross-product. */
12346 uint64_t resultmid = xproductlo >> 32;
12347 /* Add in middle products. */
12348 resultmid = resultmid + xproductmid1;
12349
12350 /* Check for overflow. */
12351 if (resultmid < xproductmid1)
12352 /* Carry over 1 into top cross-product. */
12353 carry++;
12354
12355 resultmid1 = resultmid + xproductmid2;
12356
12357 /* Check for overflow. */
12358 if (resultmid1 < xproductmid2)
12359 /* Carry over 1 into top cross-product. */
12360 carry++;
12361
12362 /* Drop lowest 32 bits of middle cross-product. */
12363 result = resultmid1 >> 32;
12364
12365 /* Add top cross-product plus and any carry. */
12366 result += xproducthi + carry;
12367
12368 return result;
12369 }
12370
12371 /* Signed multiply high, source, source2 :
12372 64 bit, dest <-- high 64-bit of result. */
12373 static void
12374 smulh (sim_cpu *cpu)
12375 {
12376 uint64_t uresult;
12377 int64_t result;
12378 unsigned rm = INSTR (20, 16);
12379 unsigned rn = INSTR (9, 5);
12380 unsigned rd = INSTR (4, 0);
12381 GReg ra = INSTR (14, 10);
12382 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12383 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12384 uint64_t uvalue1;
12385 uint64_t uvalue2;
12386 int64_t signum = 1;
12387
12388 if (ra != R31)
12389 HALT_UNALLOC;
12390
12391 /* Convert to unsigned and use the unsigned mul64hi routine
12392 the fix the sign up afterwards. */
12393 if (value1 < 0)
12394 {
12395 signum *= -1L;
12396 uvalue1 = -value1;
12397 }
12398 else
12399 {
12400 uvalue1 = value1;
12401 }
12402
12403 if (value2 < 0)
12404 {
12405 signum *= -1L;
12406 uvalue2 = -value2;
12407 }
12408 else
12409 {
12410 uvalue2 = value2;
12411 }
12412
12413 uresult = mul64hi (uvalue1, uvalue2);
12414 result = uresult;
12415 result *= signum;
12416
12417 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
12418 }
12419
12420 /* Unsigned multiply add long -- source, source2 :
12421 32 bit, source3 : 64 bit. */
12422 static void
12423 umaddl (sim_cpu *cpu)
12424 {
12425 unsigned rm = INSTR (20, 16);
12426 unsigned ra = INSTR (14, 10);
12427 unsigned rn = INSTR (9, 5);
12428 unsigned rd = INSTR (4, 0);
12429
12430 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12431 obtain a 64 bit product. */
12432 aarch64_set_reg_u64
12433 (cpu, rd, NO_SP,
12434 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12435 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12436 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12437 }
12438
12439 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12440 static void
12441 umsubl (sim_cpu *cpu)
12442 {
12443 unsigned rm = INSTR (20, 16);
12444 unsigned ra = INSTR (14, 10);
12445 unsigned rn = INSTR (9, 5);
12446 unsigned rd = INSTR (4, 0);
12447
12448 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12449 obtain a 64 bit product. */
12450 aarch64_set_reg_u64
12451 (cpu, rd, NO_SP,
12452 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12453 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12454 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12455 }
12456
12457 /* Unsigned multiply high, source, source2 :
12458 64 bit, dest <-- high 64-bit of result. */
12459 static void
12460 umulh (sim_cpu *cpu)
12461 {
12462 unsigned rm = INSTR (20, 16);
12463 unsigned rn = INSTR (9, 5);
12464 unsigned rd = INSTR (4, 0);
12465 GReg ra = INSTR (14, 10);
12466
12467 if (ra != R31)
12468 HALT_UNALLOC;
12469
12470 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12471 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
12472 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12473 }
12474
12475 static void
12476 dexDataProc3Source (sim_cpu *cpu)
12477 {
12478 /* assert instr[28,24] == 11011. */
12479 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
12480 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
12481 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
12482 instr[15] = o0 : 0/1 ==> ok
12483 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
12484 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
12485 0100 ==> SMULH, (64 bit only)
12486 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
12487 1100 ==> UMULH (64 bit only)
12488 ow ==> UNALLOC. */
12489
12490 uint32_t dispatch;
12491 uint32_t size = INSTR (31, 31);
12492 uint32_t op54 = INSTR (30, 29);
12493 uint32_t op31 = INSTR (23, 21);
12494 uint32_t o0 = INSTR (15, 15);
12495
12496 if (op54 != 0)
12497 HALT_UNALLOC;
12498
12499 if (size == 0)
12500 {
12501 if (op31 != 0)
12502 HALT_UNALLOC;
12503
12504 if (o0 == 0)
12505 madd32 (cpu);
12506 else
12507 msub32 (cpu);
12508 return;
12509 }
12510
12511 dispatch = (op31 << 1) | o0;
12512
12513 switch (dispatch)
12514 {
12515 case 0: madd64 (cpu); return;
12516 case 1: msub64 (cpu); return;
12517 case 2: smaddl (cpu); return;
12518 case 3: smsubl (cpu); return;
12519 case 4: smulh (cpu); return;
12520 case 10: umaddl (cpu); return;
12521 case 11: umsubl (cpu); return;
12522 case 12: umulh (cpu); return;
12523 default: HALT_UNALLOC;
12524 }
12525 }
12526
12527 static void
12528 dexDPReg (sim_cpu *cpu)
12529 {
12530 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12531 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
12532 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
12533 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
12534
12535 switch (group2)
12536 {
12537 case DPREG_LOG_000:
12538 case DPREG_LOG_001:
12539 dexLogicalShiftedRegister (cpu); return;
12540
12541 case DPREG_ADDSHF_010:
12542 dexAddSubtractShiftedRegister (cpu); return;
12543
12544 case DPREG_ADDEXT_011:
12545 dexAddSubtractExtendedRegister (cpu); return;
12546
12547 case DPREG_ADDCOND_100:
12548 {
12549 /* This set bundles a variety of different operations. */
12550 /* Check for. */
12551 /* 1) add/sub w carry. */
12552 uint32_t mask1 = 0x1FE00000U;
12553 uint32_t val1 = 0x1A000000U;
12554 /* 2) cond compare register/immediate. */
12555 uint32_t mask2 = 0x1FE00000U;
12556 uint32_t val2 = 0x1A400000U;
12557 /* 3) cond select. */
12558 uint32_t mask3 = 0x1FE00000U;
12559 uint32_t val3 = 0x1A800000U;
12560 /* 4) data proc 1/2 source. */
12561 uint32_t mask4 = 0x1FE00000U;
12562 uint32_t val4 = 0x1AC00000U;
12563
12564 if ((aarch64_get_instr (cpu) & mask1) == val1)
12565 dexAddSubtractWithCarry (cpu);
12566
12567 else if ((aarch64_get_instr (cpu) & mask2) == val2)
12568 CondCompare (cpu);
12569
12570 else if ((aarch64_get_instr (cpu) & mask3) == val3)
12571 dexCondSelect (cpu);
12572
12573 else if ((aarch64_get_instr (cpu) & mask4) == val4)
12574 {
12575 /* Bit 30 is clear for data proc 2 source
12576 and set for data proc 1 source. */
12577 if (aarch64_get_instr (cpu) & (1U << 30))
12578 dexDataProc1Source (cpu);
12579 else
12580 dexDataProc2Source (cpu);
12581 }
12582
12583 else
12584 /* Should not reach here. */
12585 HALT_NYI;
12586
12587 return;
12588 }
12589
12590 case DPREG_3SRC_110:
12591 dexDataProc3Source (cpu); return;
12592
12593 case DPREG_UNALLOC_101:
12594 HALT_UNALLOC;
12595
12596 case DPREG_3SRC_111:
12597 dexDataProc3Source (cpu); return;
12598
12599 default:
12600 /* Should never reach here. */
12601 HALT_NYI;
12602 }
12603 }
12604
12605 /* Unconditional Branch immediate.
12606 Offset is a PC-relative byte offset in the range +/- 128MiB.
12607 The offset is assumed to be raw from the decode i.e. the
12608 simulator is expected to scale them from word offsets to byte. */
12609
12610 /* Unconditional branch. */
12611 static void
12612 buc (sim_cpu *cpu, int32_t offset)
12613 {
12614 aarch64_set_next_PC_by_offset (cpu, offset);
12615 }
12616
12617 static unsigned stack_depth = 0;
12618
12619 /* Unconditional branch and link -- writes return PC to LR. */
12620 static void
12621 bl (sim_cpu *cpu, int32_t offset)
12622 {
12623 aarch64_save_LR (cpu);
12624 aarch64_set_next_PC_by_offset (cpu, offset);
12625
12626 if (TRACE_BRANCH_P (cpu))
12627 {
12628 ++ stack_depth;
12629 TRACE_BRANCH (cpu,
12630 " %*scall %" PRIx64 " [%s]"
12631 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
12632 stack_depth, " ", aarch64_get_next_PC (cpu),
12633 aarch64_get_func (aarch64_get_next_PC (cpu)),
12634 aarch64_get_reg_u64 (cpu, 0, NO_SP),
12635 aarch64_get_reg_u64 (cpu, 1, NO_SP),
12636 aarch64_get_reg_u64 (cpu, 2, NO_SP)
12637 );
12638 }
12639 }
12640
12641 /* Unconditional Branch register.
12642 Branch/return address is in source register. */
12643
12644 /* Unconditional branch. */
12645 static void
12646 br (sim_cpu *cpu)
12647 {
12648 unsigned rn = INSTR (9, 5);
12649 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12650 }
12651
12652 /* Unconditional branch and link -- writes return PC to LR. */
12653 static void
12654 blr (sim_cpu *cpu)
12655 {
12656 unsigned rn = INSTR (9, 5);
12657
12658 /* The pseudo code in the spec says we update LR before fetching.
12659 the value from the rn. */
12660 aarch64_save_LR (cpu);
12661 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12662
12663 if (TRACE_BRANCH_P (cpu))
12664 {
12665 ++ stack_depth;
12666 TRACE_BRANCH (cpu,
12667 " %*scall %" PRIx64 " [%s]"
12668 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
12669 stack_depth, " ", aarch64_get_next_PC (cpu),
12670 aarch64_get_func (aarch64_get_next_PC (cpu)),
12671 aarch64_get_reg_u64 (cpu, 0, NO_SP),
12672 aarch64_get_reg_u64 (cpu, 1, NO_SP),
12673 aarch64_get_reg_u64 (cpu, 2, NO_SP)
12674 );
12675 }
12676 }
12677
12678 /* Return -- assembler will default source to LR this is functionally
12679 equivalent to br but, presumably, unlike br it side effects the
12680 branch predictor. */
12681 static void
12682 ret (sim_cpu *cpu)
12683 {
12684 unsigned rn = INSTR (9, 5);
12685 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
12686
12687 if (TRACE_BRANCH_P (cpu))
12688 {
12689 TRACE_BRANCH (cpu,
12690 " %*sreturn [result: %" PRIx64 "]",
12691 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
12692 -- stack_depth;
12693 }
12694 }
12695
12696 /* NOP -- we implement this and call it from the decode in case we
12697 want to intercept it later. */
12698
12699 static void
12700 nop (sim_cpu *cpu)
12701 {
12702 }
12703
12704 /* Data synchronization barrier. */
12705
12706 static void
12707 dsb (sim_cpu *cpu)
12708 {
12709 }
12710
12711 /* Data memory barrier. */
12712
12713 static void
12714 dmb (sim_cpu *cpu)
12715 {
12716 }
12717
12718 /* Instruction synchronization barrier. */
12719
12720 static void
12721 isb (sim_cpu *cpu)
12722 {
12723 }
12724
12725 static void
12726 dexBranchImmediate (sim_cpu *cpu)
12727 {
12728 /* assert instr[30,26] == 00101
12729 instr[31] ==> 0 == B, 1 == BL
12730 instr[25,0] == imm26 branch offset counted in words. */
12731
12732 uint32_t top = INSTR (31, 31);
12733 /* We have a 26 byte signed word offset which we need to pass to the
12734 execute routine as a signed byte offset. */
12735 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
12736
12737 if (top)
12738 bl (cpu, offset);
12739 else
12740 buc (cpu, offset);
12741 }
12742
12743 /* Control Flow. */
12744
12745 /* Conditional branch
12746
12747 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
12748 a bit position in the range 0 .. 63
12749
12750 cc is a CondCode enum value as pulled out of the decode
12751
12752 N.B. any offset register (source) can only be Xn or Wn. */
12753
12754 static void
12755 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
12756 {
12757 /* the test returns TRUE if CC is met. */
12758 if (testConditionCode (cpu, cc))
12759 aarch64_set_next_PC_by_offset (cpu, offset);
12760 }
12761
12762 /* 32 bit branch on register non-zero. */
12763 static void
12764 cbnz32 (sim_cpu *cpu, int32_t offset)
12765 {
12766 unsigned rt = INSTR (4, 0);
12767
12768 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
12769 aarch64_set_next_PC_by_offset (cpu, offset);
12770 }
12771
12772 /* 64 bit branch on register zero. */
12773 static void
12774 cbnz (sim_cpu *cpu, int32_t offset)
12775 {
12776 unsigned rt = INSTR (4, 0);
12777
12778 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
12779 aarch64_set_next_PC_by_offset (cpu, offset);
12780 }
12781
12782 /* 32 bit branch on register non-zero. */
12783 static void
12784 cbz32 (sim_cpu *cpu, int32_t offset)
12785 {
12786 unsigned rt = INSTR (4, 0);
12787
12788 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
12789 aarch64_set_next_PC_by_offset (cpu, offset);
12790 }
12791
12792 /* 64 bit branch on register zero. */
12793 static void
12794 cbz (sim_cpu *cpu, int32_t offset)
12795 {
12796 unsigned rt = INSTR (4, 0);
12797
12798 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
12799 aarch64_set_next_PC_by_offset (cpu, offset);
12800 }
12801
12802 /* Branch on register bit test non-zero -- one size fits all. */
12803 static void
12804 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
12805 {
12806 unsigned rt = INSTR (4, 0);
12807
12808 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos))
12809 aarch64_set_next_PC_by_offset (cpu, offset);
12810 }
12811
12812 /* branch on register bit test zero -- one size fits all. */
12813 static void
12814 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
12815 {
12816 unsigned rt = INSTR (4, 0);
12817
12818 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos)))
12819 aarch64_set_next_PC_by_offset (cpu, offset);
12820 }
12821
12822 static void
12823 dexCompareBranchImmediate (sim_cpu *cpu)
12824 {
12825 /* instr[30,25] = 01 1010
12826 instr[31] = size : 0 ==> 32, 1 ==> 64
12827 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
12828 instr[23,5] = simm19 branch offset counted in words
12829 instr[4,0] = rt */
12830
12831 uint32_t size = INSTR (31, 31);
12832 uint32_t op = INSTR (24, 24);
12833 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
12834
12835 if (size == 0)
12836 {
12837 if (op == 0)
12838 cbz32 (cpu, offset);
12839 else
12840 cbnz32 (cpu, offset);
12841 }
12842 else
12843 {
12844 if (op == 0)
12845 cbz (cpu, offset);
12846 else
12847 cbnz (cpu, offset);
12848 }
12849 }
12850
12851 static void
12852 dexTestBranchImmediate (sim_cpu *cpu)
12853 {
12854 /* instr[31] = b5 : bit 5 of test bit idx
12855 instr[30,25] = 01 1011
12856 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
12857 instr[23,19] = b40 : bits 4 to 0 of test bit idx
12858 instr[18,5] = simm14 : signed offset counted in words
12859 instr[4,0] = uimm5 */
12860
12861 uint32_t pos = ((INSTR (31, 31) << 4) | INSTR (23, 19));
12862 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
12863
12864 NYI_assert (30, 25, 0x1b);
12865
12866 if (INSTR (24, 24) == 0)
12867 tbz (cpu, pos, offset);
12868 else
12869 tbnz (cpu, pos, offset);
12870 }
12871
12872 static void
12873 dexCondBranchImmediate (sim_cpu *cpu)
12874 {
12875 /* instr[31,25] = 010 1010
12876 instr[24] = op1; op => 00 ==> B.cond
12877 instr[23,5] = simm19 : signed offset counted in words
12878 instr[4] = op0
12879 instr[3,0] = cond */
12880
12881 int32_t offset;
12882 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
12883
12884 NYI_assert (31, 25, 0x2a);
12885
12886 if (op != 0)
12887 HALT_UNALLOC;
12888
12889 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
12890
12891 bcc (cpu, offset, INSTR (3, 0));
12892 }
12893
12894 static void
12895 dexBranchRegister (sim_cpu *cpu)
12896 {
12897 /* instr[31,25] = 110 1011
12898 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
12899 instr[20,16] = op2 : must be 11111
12900 instr[15,10] = op3 : must be 000000
12901 instr[4,0] = op2 : must be 11111. */
12902
12903 uint32_t op = INSTR (24, 21);
12904 uint32_t op2 = INSTR (20, 16);
12905 uint32_t op3 = INSTR (15, 10);
12906 uint32_t op4 = INSTR (4, 0);
12907
12908 NYI_assert (31, 25, 0x6b);
12909
12910 if (op2 != 0x1F || op3 != 0 || op4 != 0)
12911 HALT_UNALLOC;
12912
12913 if (op == 0)
12914 br (cpu);
12915
12916 else if (op == 1)
12917 blr (cpu);
12918
12919 else if (op == 2)
12920 ret (cpu);
12921
12922 else
12923 {
12924 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
12925 /* anything else is unallocated. */
12926 uint32_t rn = INSTR (4, 0);
12927
12928 if (rn != 0x1f)
12929 HALT_UNALLOC;
12930
12931 if (op == 4 || op == 5)
12932 HALT_NYI;
12933
12934 HALT_UNALLOC;
12935 }
12936 }
12937
12938 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
12939 but this may not be available. So instead we define the values we need
12940 here. */
12941 #define AngelSVC_Reason_Open 0x01
12942 #define AngelSVC_Reason_Close 0x02
12943 #define AngelSVC_Reason_Write 0x05
12944 #define AngelSVC_Reason_Read 0x06
12945 #define AngelSVC_Reason_IsTTY 0x09
12946 #define AngelSVC_Reason_Seek 0x0A
12947 #define AngelSVC_Reason_FLen 0x0C
12948 #define AngelSVC_Reason_Remove 0x0E
12949 #define AngelSVC_Reason_Rename 0x0F
12950 #define AngelSVC_Reason_Clock 0x10
12951 #define AngelSVC_Reason_Time 0x11
12952 #define AngelSVC_Reason_System 0x12
12953 #define AngelSVC_Reason_Errno 0x13
12954 #define AngelSVC_Reason_GetCmdLine 0x15
12955 #define AngelSVC_Reason_HeapInfo 0x16
12956 #define AngelSVC_Reason_ReportException 0x18
12957 #define AngelSVC_Reason_Elapsed 0x30
12958
12959
12960 static void
12961 handle_halt (sim_cpu *cpu, uint32_t val)
12962 {
12963 uint64_t result = 0;
12964
12965 if (val != 0xf000)
12966 {
12967 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
12968 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
12969 sim_stopped, SIM_SIGTRAP);
12970 }
12971
12972 /* We have encountered an Angel SVC call. See if we can process it. */
12973 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
12974 {
12975 case AngelSVC_Reason_HeapInfo:
12976 {
12977 /* Get the values. */
12978 uint64_t stack_top = aarch64_get_stack_start (cpu);
12979 uint64_t heap_base = aarch64_get_heap_start (cpu);
12980
12981 /* Get the pointer */
12982 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
12983 ptr = aarch64_get_mem_u64 (cpu, ptr);
12984
12985 /* Fill in the memory block. */
12986 /* Start addr of heap. */
12987 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
12988 /* End addr of heap. */
12989 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
12990 /* Lowest stack addr. */
12991 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
12992 /* Initial stack addr. */
12993 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
12994
12995 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
12996 }
12997 break;
12998
12999 case AngelSVC_Reason_Open:
13000 {
13001 /* Get the pointer */
13002 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13003 /* FIXME: For now we just assume that we will only be asked
13004 to open the standard file descriptors. */
13005 static int fd = 0;
13006 result = fd ++;
13007
13008 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13009 }
13010 break;
13011
13012 case AngelSVC_Reason_Close:
13013 {
13014 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13015 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13016 result = 0;
13017 }
13018 break;
13019
13020 case AngelSVC_Reason_Errno:
13021 result = 0;
13022 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13023 break;
13024
13025 case AngelSVC_Reason_Clock:
13026 result =
13027 #ifdef CLOCKS_PER_SEC
13028 (CLOCKS_PER_SEC >= 100)
13029 ? (clock () / (CLOCKS_PER_SEC / 100))
13030 : ((clock () * 100) / CLOCKS_PER_SEC)
13031 #else
13032 /* Presume unix... clock() returns microseconds. */
13033 (clock () / 10000)
13034 #endif
13035 ;
13036 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13037 break;
13038
13039 case AngelSVC_Reason_GetCmdLine:
13040 {
13041 /* Get the pointer */
13042 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13043 ptr = aarch64_get_mem_u64 (cpu, ptr);
13044
13045 /* FIXME: No command line for now. */
13046 aarch64_set_mem_u64 (cpu, ptr, 0);
13047 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13048 }
13049 break;
13050
13051 case AngelSVC_Reason_IsTTY:
13052 result = 1;
13053 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13054 break;
13055
13056 case AngelSVC_Reason_Write:
13057 {
13058 /* Get the pointer */
13059 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13060 /* Get the write control block. */
13061 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13062 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13063 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13064
13065 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13066 PRIx64 " on descriptor %" PRIx64,
13067 len, buf, fd);
13068
13069 if (len > 1280)
13070 {
13071 TRACE_SYSCALL (cpu,
13072 " AngelSVC: Write: Suspiciously long write: %ld",
13073 (long) len);
13074 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13075 sim_stopped, SIM_SIGBUS);
13076 }
13077 else if (fd == 1)
13078 {
13079 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13080 }
13081 else if (fd == 2)
13082 {
13083 TRACE (cpu, 0, "\n");
13084 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13085 (int) len, aarch64_get_mem_ptr (cpu, buf));
13086 TRACE (cpu, 0, "\n");
13087 }
13088 else
13089 {
13090 TRACE_SYSCALL (cpu,
13091 " AngelSVC: Write: Unexpected file handle: %d",
13092 (int) fd);
13093 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13094 sim_stopped, SIM_SIGABRT);
13095 }
13096 }
13097 break;
13098
13099 case AngelSVC_Reason_ReportException:
13100 {
13101 /* Get the pointer */
13102 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13103 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13104 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13105 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13106
13107 TRACE_SYSCALL (cpu,
13108 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13109 type, state);
13110
13111 if (type == 0x20026)
13112 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13113 sim_exited, state);
13114 else
13115 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13116 sim_stopped, SIM_SIGINT);
13117 }
13118 break;
13119
13120 case AngelSVC_Reason_Read:
13121 case AngelSVC_Reason_FLen:
13122 case AngelSVC_Reason_Seek:
13123 case AngelSVC_Reason_Remove:
13124 case AngelSVC_Reason_Time:
13125 case AngelSVC_Reason_System:
13126 case AngelSVC_Reason_Rename:
13127 case AngelSVC_Reason_Elapsed:
13128 default:
13129 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13130 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13131 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13132 sim_stopped, SIM_SIGTRAP);
13133 }
13134
13135 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13136 }
13137
13138 static void
13139 dexExcpnGen (sim_cpu *cpu)
13140 {
13141 /* instr[31:24] = 11010100
13142 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13143 010 ==> HLT, 101 ==> DBG GEN EXCPN
13144 instr[20,5] = imm16
13145 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13146 instr[1,0] = LL : discriminates opc */
13147
13148 uint32_t opc = INSTR (23, 21);
13149 uint32_t imm16 = INSTR (20, 5);
13150 uint32_t opc2 = INSTR (4, 2);
13151 uint32_t LL;
13152
13153 NYI_assert (31, 24, 0xd4);
13154
13155 if (opc2 != 0)
13156 HALT_UNALLOC;
13157
13158 LL = INSTR (1, 0);
13159
13160 /* We only implement HLT and BRK for now. */
13161 if (opc == 1 && LL == 0)
13162 {
13163 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13164 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13165 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13166 }
13167
13168 if (opc == 2 && LL == 0)
13169 handle_halt (cpu, imm16);
13170
13171 else if (opc == 0 || opc == 5)
13172 HALT_NYI;
13173
13174 else
13175 HALT_UNALLOC;
13176 }
13177
13178 /* Stub for accessing system registers. */
13179
13180 static uint64_t
13181 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13182 unsigned crm, unsigned op2)
13183 {
13184 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13185 /* DCZID_EL0 - the Data Cache Zero ID register.
13186 We do not support DC ZVA at the moment, so
13187 we return a value with the disable bit set.
13188 We implement support for the DCZID register since
13189 it is used by the C library's memset function. */
13190 return ((uint64_t) 1) << 4;
13191
13192 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13193 /* Cache Type Register. */
13194 return 0x80008000UL;
13195
13196 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13197 /* TPIDR_EL0 - thread pointer id. */
13198 return aarch64_get_thread_id (cpu);
13199
13200 if (op1 == 3 && crm == 4 && op2 == 0)
13201 return aarch64_get_FPCR (cpu);
13202
13203 if (op1 == 3 && crm == 4 && op2 == 1)
13204 return aarch64_get_FPSR (cpu);
13205
13206 else if (op1 == 3 && crm == 2 && op2 == 0)
13207 return aarch64_get_CPSR (cpu);
13208
13209 HALT_NYI;
13210 }
13211
13212 static void
13213 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13214 unsigned crm, unsigned op2, uint64_t val)
13215 {
13216 if (op1 == 3 && crm == 4 && op2 == 0)
13217 aarch64_set_FPCR (cpu, val);
13218
13219 else if (op1 == 3 && crm == 4 && op2 == 1)
13220 aarch64_set_FPSR (cpu, val);
13221
13222 else if (op1 == 3 && crm == 2 && op2 == 0)
13223 aarch64_set_CPSR (cpu, val);
13224
13225 else
13226 HALT_NYI;
13227 }
13228
13229 static void
13230 do_mrs (sim_cpu *cpu)
13231 {
13232 /* instr[31:20] = 1101 0101 0001 1
13233 instr[19] = op0
13234 instr[18,16] = op1
13235 instr[15,12] = CRn
13236 instr[11,8] = CRm
13237 instr[7,5] = op2
13238 instr[4,0] = Rt */
13239 unsigned sys_op0 = INSTR (19, 19) + 2;
13240 unsigned sys_op1 = INSTR (18, 16);
13241 unsigned sys_crn = INSTR (15, 12);
13242 unsigned sys_crm = INSTR (11, 8);
13243 unsigned sys_op2 = INSTR (7, 5);
13244 unsigned rt = INSTR (4, 0);
13245
13246 aarch64_set_reg_u64 (cpu, rt, NO_SP,
13247 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13248 }
13249
13250 static void
13251 do_MSR_immediate (sim_cpu *cpu)
13252 {
13253 /* instr[31:19] = 1101 0101 0000 0
13254 instr[18,16] = op1
13255 instr[15,12] = 0100
13256 instr[11,8] = CRm
13257 instr[7,5] = op2
13258 instr[4,0] = 1 1111 */
13259
13260 unsigned op1 = INSTR (18, 16);
13261 /*unsigned crm = INSTR (11, 8);*/
13262 unsigned op2 = INSTR (7, 5);
13263
13264 NYI_assert (31, 19, 0x1AA0);
13265 NYI_assert (15, 12, 0x4);
13266 NYI_assert (4, 0, 0x1F);
13267
13268 if (op1 == 0)
13269 {
13270 if (op2 == 5)
13271 HALT_NYI; /* set SPSel. */
13272 else
13273 HALT_UNALLOC;
13274 }
13275 else if (op1 == 3)
13276 {
13277 if (op2 == 6)
13278 HALT_NYI; /* set DAIFset. */
13279 else if (op2 == 7)
13280 HALT_NYI; /* set DAIFclr. */
13281 else
13282 HALT_UNALLOC;
13283 }
13284 else
13285 HALT_UNALLOC;
13286 }
13287
13288 static void
13289 do_MSR_reg (sim_cpu *cpu)
13290 {
13291 /* instr[31:20] = 1101 0101 0001
13292 instr[19] = op0
13293 instr[18,16] = op1
13294 instr[15,12] = CRn
13295 instr[11,8] = CRm
13296 instr[7,5] = op2
13297 instr[4,0] = Rt */
13298
13299 unsigned sys_op0 = INSTR (19, 19) + 2;
13300 unsigned sys_op1 = INSTR (18, 16);
13301 unsigned sys_crn = INSTR (15, 12);
13302 unsigned sys_crm = INSTR (11, 8);
13303 unsigned sys_op2 = INSTR (7, 5);
13304 unsigned rt = INSTR (4, 0);
13305
13306 NYI_assert (31, 20, 0xD51);
13307
13308 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
13309 aarch64_get_reg_u64 (cpu, rt, NO_SP));
13310 }
13311
13312 static void
13313 do_SYS (sim_cpu *cpu)
13314 {
13315 /* instr[31,19] = 1101 0101 0000 1
13316 instr[18,16] = op1
13317 instr[15,12] = CRn
13318 instr[11,8] = CRm
13319 instr[7,5] = op2
13320 instr[4,0] = Rt */
13321 NYI_assert (31, 19, 0x1AA1);
13322
13323 /* FIXME: For now we just silently accept system ops. */
13324 }
13325
13326 static void
13327 dexSystem (sim_cpu *cpu)
13328 {
13329 /* instr[31:22] = 1101 01010 0
13330 instr[21] = L
13331 instr[20,19] = op0
13332 instr[18,16] = op1
13333 instr[15,12] = CRn
13334 instr[11,8] = CRm
13335 instr[7,5] = op2
13336 instr[4,0] = uimm5 */
13337
13338 /* We are interested in HINT, DSB, DMB and ISB
13339
13340 Hint #0 encodes NOOP (this is the only hint we care about)
13341 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
13342 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
13343
13344 DSB, DMB, ISB are data store barrier, data memory barrier and
13345 instruction store barrier, respectively, where
13346
13347 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
13348 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
13349 CRm<3:2> ==> domain, CRm<1:0> ==> types,
13350 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
13351 10 ==> InerShareable, 11 ==> FullSystem
13352 types : 01 ==> Reads, 10 ==> Writes,
13353 11 ==> All, 00 ==> All (domain == FullSystem). */
13354
13355 unsigned rt = INSTR (4, 0);
13356
13357 NYI_assert (31, 22, 0x354);
13358
13359 switch (INSTR (21, 12))
13360 {
13361 case 0x032:
13362 if (rt == 0x1F)
13363 {
13364 /* NOP has CRm != 0000 OR. */
13365 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
13366 uint32_t crm = INSTR (11, 8);
13367 uint32_t op2 = INSTR (7, 5);
13368
13369 if (crm != 0 || (op2 == 0 || op2 > 5))
13370 {
13371 /* Actually call nop method so we can reimplement it later. */
13372 nop (cpu);
13373 return;
13374 }
13375 }
13376 HALT_NYI;
13377
13378 case 0x033:
13379 {
13380 uint32_t op2 = INSTR (7, 5);
13381
13382 switch (op2)
13383 {
13384 case 2: HALT_NYI;
13385 case 4: dsb (cpu); return;
13386 case 5: dmb (cpu); return;
13387 case 6: isb (cpu); return;
13388 default: HALT_UNALLOC;
13389 }
13390 }
13391
13392 case 0x3B0:
13393 case 0x3B4:
13394 case 0x3BD:
13395 do_mrs (cpu);
13396 return;
13397
13398 case 0x0B7:
13399 do_SYS (cpu); /* DC is an alias of SYS. */
13400 return;
13401
13402 default:
13403 if (INSTR (21, 20) == 0x1)
13404 do_MSR_reg (cpu);
13405 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
13406 do_MSR_immediate (cpu);
13407 else
13408 HALT_NYI;
13409 return;
13410 }
13411 }
13412
13413 static void
13414 dexBr (sim_cpu *cpu)
13415 {
13416 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13417 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
13418 bits [31,29] of a BrExSys are the secondary dispatch vector. */
13419 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
13420
13421 switch (group2)
13422 {
13423 case BR_IMM_000:
13424 return dexBranchImmediate (cpu);
13425
13426 case BR_IMMCMP_001:
13427 /* Compare has bit 25 clear while test has it set. */
13428 if (!INSTR (25, 25))
13429 dexCompareBranchImmediate (cpu);
13430 else
13431 dexTestBranchImmediate (cpu);
13432 return;
13433
13434 case BR_IMMCOND_010:
13435 /* This is a conditional branch if bit 25 is clear otherwise
13436 unallocated. */
13437 if (!INSTR (25, 25))
13438 dexCondBranchImmediate (cpu);
13439 else
13440 HALT_UNALLOC;
13441 return;
13442
13443 case BR_UNALLOC_011:
13444 HALT_UNALLOC;
13445
13446 case BR_IMM_100:
13447 dexBranchImmediate (cpu);
13448 return;
13449
13450 case BR_IMMCMP_101:
13451 /* Compare has bit 25 clear while test has it set. */
13452 if (!INSTR (25, 25))
13453 dexCompareBranchImmediate (cpu);
13454 else
13455 dexTestBranchImmediate (cpu);
13456 return;
13457
13458 case BR_REG_110:
13459 /* Unconditional branch reg has bit 25 set. */
13460 if (INSTR (25, 25))
13461 dexBranchRegister (cpu);
13462
13463 /* This includes both Excpn Gen, System and unalloc operations.
13464 We need to decode the Excpn Gen operation BRK so we can plant
13465 debugger entry points.
13466 Excpn Gen operations have instr [24] = 0.
13467 we need to decode at least one of the System operations NOP
13468 which is an alias for HINT #0.
13469 System operations have instr [24,22] = 100. */
13470 else if (INSTR (24, 24) == 0)
13471 dexExcpnGen (cpu);
13472
13473 else if (INSTR (24, 22) == 4)
13474 dexSystem (cpu);
13475
13476 else
13477 HALT_UNALLOC;
13478
13479 return;
13480
13481 case BR_UNALLOC_111:
13482 HALT_UNALLOC;
13483
13484 default:
13485 /* Should never reach here. */
13486 HALT_NYI;
13487 }
13488 }
13489
13490 static void
13491 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
13492 {
13493 /* We need to check if gdb wants an in here. */
13494 /* checkBreak (cpu);. */
13495
13496 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
13497
13498 switch (group)
13499 {
13500 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
13501 case GROUP_LDST_0100: dexLdSt (cpu); break;
13502 case GROUP_DPREG_0101: dexDPReg (cpu); break;
13503 case GROUP_LDST_0110: dexLdSt (cpu); break;
13504 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
13505 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
13506 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
13507 case GROUP_BREXSYS_1010: dexBr (cpu); break;
13508 case GROUP_BREXSYS_1011: dexBr (cpu); break;
13509 case GROUP_LDST_1100: dexLdSt (cpu); break;
13510 case GROUP_DPREG_1101: dexDPReg (cpu); break;
13511 case GROUP_LDST_1110: dexLdSt (cpu); break;
13512 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
13513
13514 case GROUP_UNALLOC_0001:
13515 case GROUP_UNALLOC_0010:
13516 case GROUP_UNALLOC_0011:
13517 HALT_UNALLOC;
13518
13519 default:
13520 /* Should never reach here. */
13521 HALT_NYI;
13522 }
13523 }
13524
13525 static bfd_boolean
13526 aarch64_step (sim_cpu *cpu)
13527 {
13528 uint64_t pc = aarch64_get_PC (cpu);
13529
13530 if (pc == TOP_LEVEL_RETURN_PC)
13531 return FALSE;
13532
13533 aarch64_set_next_PC (cpu, pc + 4);
13534 aarch64_get_instr (cpu) = aarch64_get_mem_u32 (cpu, pc);
13535
13536 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
13537 aarch64_get_instr (cpu));
13538 TRACE_DISASM (cpu, pc);
13539
13540 aarch64_decode_and_execute (cpu, pc);
13541
13542 return TRUE;
13543 }
13544
13545 void
13546 aarch64_run (SIM_DESC sd)
13547 {
13548 sim_cpu *cpu = STATE_CPU (sd, 0);
13549
13550 while (aarch64_step (cpu))
13551 aarch64_update_PC (cpu);
13552
13553 sim_engine_halt (sd, NULL, NULL, aarch64_get_PC (cpu),
13554 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13555 }
13556
13557 void
13558 aarch64_init (sim_cpu *cpu, uint64_t pc)
13559 {
13560 uint64_t sp = aarch64_get_stack_start (cpu);
13561
13562 /* Install SP, FP and PC and set LR to -20
13563 so we can detect a top-level return. */
13564 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
13565 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
13566 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
13567 aarch64_set_next_PC (cpu, pc);
13568 aarch64_update_PC (cpu);
13569 aarch64_init_LIT_table ();
13570 }
This page took 0.291211 seconds and 5 git commands to generate.