c8e65c5c416a6fe7eb8c687809a49879eecbedde
[deliverable/binutils-gdb.git] / sim / aarch64 / simulator.c
1 /* simulator.c -- Interface for the AArch64 simulator.
2
3 Copyright (C) 2015-2017 Free Software Foundation, Inc.
4
5 Contributed by Red Hat.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <sys/types.h>
27 #include <math.h>
28 #include <time.h>
29 #include <limits.h>
30
31 #include "simulator.h"
32 #include "cpustate.h"
33 #include "memory.h"
34
35 #define NO_SP 0
36 #define SP_OK 1
37
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
41
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
44
45 #define HALT_UNALLOC \
46 do \
47 { \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
49 TRACE_INSN (cpu, \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
55 } \
56 while (0)
57
58 #define HALT_NYI \
59 do \
60 { \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
62 TRACE_INSN (cpu, \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
67 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
68 aarch64_get_instr (cpu)); \
69 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
70 sim_stopped, SIM_SIGABRT); \
71 } \
72 while (0)
73
74 #define NYI_assert(HI, LO, EXPECTED) \
75 do \
76 { \
77 if (INSTR ((HI), (LO)) != (EXPECTED)) \
78 HALT_NYI; \
79 } \
80 while (0)
81
82 /* Helper functions used by expandLogicalImmediate. */
83
84 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
85 static inline uint64_t
86 ones (int N)
87 {
88 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
89 }
90
91 /* result<0> to val<N> */
92 static inline uint64_t
93 pickbit (uint64_t val, int N)
94 {
95 return pickbits64 (val, N, N);
96 }
97
98 static uint64_t
99 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
100 {
101 uint64_t mask;
102 uint64_t imm;
103 unsigned simd_size;
104
105 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
106 (in other words, right rotated by R), then replicated. */
107 if (N != 0)
108 {
109 simd_size = 64;
110 mask = 0xffffffffffffffffull;
111 }
112 else
113 {
114 switch (S)
115 {
116 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
117 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
118 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
119 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
120 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
121 default: return 0;
122 }
123 mask = (1ull << simd_size) - 1;
124 /* Top bits are IGNORED. */
125 R &= simd_size - 1;
126 }
127
128 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
129 if (S == simd_size - 1)
130 return 0;
131
132 /* S+1 consecutive bits to 1. */
133 /* NOTE: S can't be 63 due to detection above. */
134 imm = (1ull << (S + 1)) - 1;
135
136 /* Rotate to the left by simd_size - R. */
137 if (R != 0)
138 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
139
140 /* Replicate the value according to SIMD size. */
141 switch (simd_size)
142 {
143 case 2: imm = (imm << 2) | imm;
144 case 4: imm = (imm << 4) | imm;
145 case 8: imm = (imm << 8) | imm;
146 case 16: imm = (imm << 16) | imm;
147 case 32: imm = (imm << 32) | imm;
148 case 64: break;
149 default: return 0;
150 }
151
152 return imm;
153 }
154
155 /* Instr[22,10] encodes N immr and imms. we want a lookup table
156 for each possible combination i.e. 13 bits worth of int entries. */
157 #define LI_TABLE_SIZE (1 << 13)
158 static uint64_t LITable[LI_TABLE_SIZE];
159
160 void
161 aarch64_init_LIT_table (void)
162 {
163 unsigned index;
164
165 for (index = 0; index < LI_TABLE_SIZE; index++)
166 {
167 uint32_t N = uimm (index, 12, 12);
168 uint32_t immr = uimm (index, 11, 6);
169 uint32_t imms = uimm (index, 5, 0);
170
171 LITable [index] = expand_logical_immediate (imms, immr, N);
172 }
173 }
174
175 static void
176 dexNotify (sim_cpu *cpu)
177 {
178 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
179 2 ==> exit Java, 3 ==> start next bytecode. */
180 uint32_t type = INSTR (14, 0);
181
182 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
183
184 switch (type)
185 {
186 case 0:
187 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
188 aarch64_get_reg_u64 (cpu, R22, 0)); */
189 break;
190 case 1:
191 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
192 aarch64_get_reg_u64 (cpu, R22, 0)); */
193 break;
194 case 2:
195 /* aarch64_notifyMethodExit (); */
196 break;
197 case 3:
198 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
199 aarch64_get_reg_u64 (cpu, R22, 0)); */
200 break;
201 }
202 }
203
204 /* secondary decode within top level groups */
205
206 static void
207 dexPseudo (sim_cpu *cpu)
208 {
209 /* assert instr[28,27] = 00
210
211 We provide 2 pseudo instructions:
212
213 HALT stops execution of the simulator causing an immediate
214 return to the x86 code which entered it.
215
216 CALLOUT initiates recursive entry into x86 code. A register
217 argument holds the address of the x86 routine. Immediate
218 values in the instruction identify the number of general
219 purpose and floating point register arguments to be passed
220 and the type of any value to be returned. */
221
222 uint32_t PSEUDO_HALT = 0xE0000000U;
223 uint32_t PSEUDO_CALLOUT = 0x00018000U;
224 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
225 uint32_t PSEUDO_NOTIFY = 0x00014000U;
226 uint32_t dispatch;
227
228 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
229 {
230 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
231 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
232 sim_stopped, SIM_SIGTRAP);
233 }
234
235 dispatch = INSTR (31, 15);
236
237 /* We do not handle callouts at the moment. */
238 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
239 {
240 TRACE_EVENTS (cpu, " Callout");
241 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
242 sim_stopped, SIM_SIGABRT);
243 }
244
245 else if (dispatch == PSEUDO_NOTIFY)
246 dexNotify (cpu);
247
248 else
249 HALT_UNALLOC;
250 }
251
252 /* Load-store single register (unscaled offset)
253 These instructions employ a base register plus an unscaled signed
254 9 bit offset.
255
256 N.B. the base register (source) can be Xn or SP. all other
257 registers may not be SP. */
258
259 /* 32 bit load 32 bit unscaled signed 9 bit. */
260 static void
261 ldur32 (sim_cpu *cpu, int32_t offset)
262 {
263 unsigned rn = INSTR (9, 5);
264 unsigned rt = INSTR (4, 0);
265
266 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
267 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
268 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
269 + offset));
270 }
271
272 /* 64 bit load 64 bit unscaled signed 9 bit. */
273 static void
274 ldur64 (sim_cpu *cpu, int32_t offset)
275 {
276 unsigned rn = INSTR (9, 5);
277 unsigned rt = INSTR (4, 0);
278
279 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
282 + offset));
283 }
284
285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
286 static void
287 ldurb32 (sim_cpu *cpu, int32_t offset)
288 {
289 unsigned rn = INSTR (9, 5);
290 unsigned rt = INSTR (4, 0);
291
292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
293 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
294 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
295 + offset));
296 }
297
298 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
299 static void
300 ldursb32 (sim_cpu *cpu, int32_t offset)
301 {
302 unsigned rn = INSTR (9, 5);
303 unsigned rt = INSTR (4, 0);
304
305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
306 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
307 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
308 + offset));
309 }
310
311 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
312 static void
313 ldursb64 (sim_cpu *cpu, int32_t offset)
314 {
315 unsigned rn = INSTR (9, 5);
316 unsigned rt = INSTR (4, 0);
317
318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
319 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
320 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
321 + offset));
322 }
323
324 /* 32 bit load zero-extended short unscaled signed 9 bit */
325 static void
326 ldurh32 (sim_cpu *cpu, int32_t offset)
327 {
328 unsigned rn = INSTR (9, 5);
329 unsigned rd = INSTR (4, 0);
330
331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
332 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
333 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
334 + offset));
335 }
336
337 /* 32 bit load sign-extended short unscaled signed 9 bit */
338 static void
339 ldursh32 (sim_cpu *cpu, int32_t offset)
340 {
341 unsigned rn = INSTR (9, 5);
342 unsigned rd = INSTR (4, 0);
343
344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
345 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
346 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
347 + offset));
348 }
349
350 /* 64 bit load sign-extended short unscaled signed 9 bit */
351 static void
352 ldursh64 (sim_cpu *cpu, int32_t offset)
353 {
354 unsigned rn = INSTR (9, 5);
355 unsigned rt = INSTR (4, 0);
356
357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
358 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
359 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
360 + offset));
361 }
362
363 /* 64 bit load sign-extended word unscaled signed 9 bit */
364 static void
365 ldursw (sim_cpu *cpu, int32_t offset)
366 {
367 unsigned rn = INSTR (9, 5);
368 unsigned rd = INSTR (4, 0);
369
370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
371 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
372 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
373 + offset));
374 }
375
376 /* N.B. with stores the value in source is written to the address
377 identified by source2 modified by offset. */
378
379 /* 32 bit store 32 bit unscaled signed 9 bit. */
380 static void
381 stur32 (sim_cpu *cpu, int32_t offset)
382 {
383 unsigned rn = INSTR (9, 5);
384 unsigned rd = INSTR (4, 0);
385
386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
387 aarch64_set_mem_u32 (cpu,
388 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
389 aarch64_get_reg_u32 (cpu, rd, NO_SP));
390 }
391
392 /* 64 bit store 64 bit unscaled signed 9 bit */
393 static void
394 stur64 (sim_cpu *cpu, int32_t offset)
395 {
396 unsigned rn = INSTR (9, 5);
397 unsigned rd = INSTR (4, 0);
398
399 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
400 aarch64_set_mem_u64 (cpu,
401 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
402 aarch64_get_reg_u64 (cpu, rd, NO_SP));
403 }
404
405 /* 32 bit store byte unscaled signed 9 bit */
406 static void
407 sturb (sim_cpu *cpu, int32_t offset)
408 {
409 unsigned rn = INSTR (9, 5);
410 unsigned rd = INSTR (4, 0);
411
412 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
413 aarch64_set_mem_u8 (cpu,
414 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
415 aarch64_get_reg_u8 (cpu, rd, NO_SP));
416 }
417
418 /* 32 bit store short unscaled signed 9 bit */
419 static void
420 sturh (sim_cpu *cpu, int32_t offset)
421 {
422 unsigned rn = INSTR (9, 5);
423 unsigned rd = INSTR (4, 0);
424
425 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
426 aarch64_set_mem_u16 (cpu,
427 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
428 aarch64_get_reg_u16 (cpu, rd, NO_SP));
429 }
430
431 /* Load single register pc-relative label
432 Offset is a signed 19 bit immediate count in words
433 rt may not be SP. */
434
435 /* 32 bit pc-relative load */
436 static void
437 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
438 {
439 unsigned rd = INSTR (4, 0);
440
441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
442 aarch64_set_reg_u64 (cpu, rd, NO_SP,
443 aarch64_get_mem_u32
444 (cpu, aarch64_get_PC (cpu) + offset * 4));
445 }
446
447 /* 64 bit pc-relative load */
448 static void
449 ldr_pcrel (sim_cpu *cpu, int32_t offset)
450 {
451 unsigned rd = INSTR (4, 0);
452
453 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
454 aarch64_set_reg_u64 (cpu, rd, NO_SP,
455 aarch64_get_mem_u64
456 (cpu, aarch64_get_PC (cpu) + offset * 4));
457 }
458
459 /* sign extended 32 bit pc-relative load */
460 static void
461 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
462 {
463 unsigned rd = INSTR (4, 0);
464
465 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
466 aarch64_set_reg_u64 (cpu, rd, NO_SP,
467 aarch64_get_mem_s32
468 (cpu, aarch64_get_PC (cpu) + offset * 4));
469 }
470
471 /* float pc-relative load */
472 static void
473 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
474 {
475 unsigned int rd = INSTR (4, 0);
476
477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
478 aarch64_set_vec_u32 (cpu, rd, 0,
479 aarch64_get_mem_u32
480 (cpu, aarch64_get_PC (cpu) + offset * 4));
481 }
482
483 /* double pc-relative load */
484 static void
485 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
486 {
487 unsigned int st = INSTR (4, 0);
488
489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
490 aarch64_set_vec_u64 (cpu, st, 0,
491 aarch64_get_mem_u64
492 (cpu, aarch64_get_PC (cpu) + offset * 4));
493 }
494
495 /* long double pc-relative load. */
496 static void
497 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
498 {
499 unsigned int st = INSTR (4, 0);
500 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
501 FRegister a;
502
503 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
504 aarch64_get_mem_long_double (cpu, addr, & a);
505 aarch64_set_FP_long_double (cpu, st, a);
506 }
507
508 /* This can be used to scale an offset by applying
509 the requisite shift. the second argument is either
510 16, 32 or 64. */
511
512 #define SCALE(_offset, _elementSize) \
513 ((_offset) << ScaleShift ## _elementSize)
514
515 /* This can be used to optionally scale a register derived offset
516 by applying the requisite shift as indicated by the Scaling
517 argument. The second argument is either Byte, Short, Word
518 or Long. The third argument is either Scaled or Unscaled.
519 N.B. when _Scaling is Scaled the shift gets ANDed with
520 all 1s while when it is Unscaled it gets ANDed with 0. */
521
522 #define OPT_SCALE(_offset, _elementType, _Scaling) \
523 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
524
525 /* This can be used to zero or sign extend a 32 bit register derived
526 value to a 64 bit value. the first argument must be the value as
527 a uint32_t and the second must be either UXTW or SXTW. The result
528 is returned as an int64_t. */
529
530 static inline int64_t
531 extend (uint32_t value, Extension extension)
532 {
533 union
534 {
535 uint32_t u;
536 int32_t n;
537 } x;
538
539 /* A branchless variant of this ought to be possible. */
540 if (extension == UXTW || extension == NoExtension)
541 return value;
542
543 x.u = value;
544 return x.n;
545 }
546
547 /* Scalar Floating Point
548
549 FP load/store single register (4 addressing modes)
550
551 N.B. the base register (source) can be the stack pointer.
552 The secondary source register (source2) can only be an Xn register. */
553
554 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
555 static void
556 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
557 {
558 unsigned rn = INSTR (9, 5);
559 unsigned st = INSTR (4, 0);
560 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
561
562 if (wb != Post)
563 address += offset;
564
565 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
566 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
567 if (wb == Post)
568 address += offset;
569
570 if (wb != NoWriteBack)
571 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
572 }
573
574 /* Load 8 bit with unsigned 12 bit offset. */
575 static void
576 fldrb_abs (sim_cpu *cpu, uint32_t offset)
577 {
578 unsigned rd = INSTR (4, 0);
579 unsigned rn = INSTR (9, 5);
580 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
581
582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
583 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
584 }
585
586 /* Load 16 bit scaled unsigned 12 bit. */
587 static void
588 fldrh_abs (sim_cpu *cpu, uint32_t offset)
589 {
590 unsigned rd = INSTR (4, 0);
591 unsigned rn = INSTR (9, 5);
592 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
593
594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
595 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
596 }
597
598 /* Load 32 bit scaled unsigned 12 bit. */
599 static void
600 fldrs_abs (sim_cpu *cpu, uint32_t offset)
601 {
602 unsigned rd = INSTR (4, 0);
603 unsigned rn = INSTR (9, 5);
604 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
605
606 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
607 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
608 }
609
610 /* Load 64 bit scaled unsigned 12 bit. */
611 static void
612 fldrd_abs (sim_cpu *cpu, uint32_t offset)
613 {
614 unsigned rd = INSTR (4, 0);
615 unsigned rn = INSTR (9, 5);
616 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
617
618 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
619 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
620 }
621
622 /* Load 128 bit scaled unsigned 12 bit. */
623 static void
624 fldrq_abs (sim_cpu *cpu, uint32_t offset)
625 {
626 unsigned rd = INSTR (4, 0);
627 unsigned rn = INSTR (9, 5);
628 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
629
630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
631 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
632 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
633 }
634
635 /* Load 32 bit scaled or unscaled zero- or sign-extended
636 32-bit register offset. */
637 static void
638 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
639 {
640 unsigned rm = INSTR (20, 16);
641 unsigned rn = INSTR (9, 5);
642 unsigned st = INSTR (4, 0);
643 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
644 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
645 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
646
647 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
648 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
649 (cpu, address + displacement));
650 }
651
652 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
653 static void
654 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
655 {
656 unsigned rn = INSTR (9, 5);
657 unsigned st = INSTR (4, 0);
658 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
659
660 if (wb != Post)
661 address += offset;
662
663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
664 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
665
666 if (wb == Post)
667 address += offset;
668
669 if (wb != NoWriteBack)
670 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
671 }
672
673 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
674 static void
675 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
676 {
677 unsigned rm = INSTR (20, 16);
678 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
679 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
680
681 fldrd_wb (cpu, displacement, NoWriteBack);
682 }
683
684 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
685 static void
686 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
687 {
688 FRegister a;
689 unsigned rn = INSTR (9, 5);
690 unsigned st = INSTR (4, 0);
691 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
692
693 if (wb != Post)
694 address += offset;
695
696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
697 aarch64_get_mem_long_double (cpu, address, & a);
698 aarch64_set_FP_long_double (cpu, st, a);
699
700 if (wb == Post)
701 address += offset;
702
703 if (wb != NoWriteBack)
704 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
705 }
706
707 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
708 static void
709 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
710 {
711 unsigned rm = INSTR (20, 16);
712 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
713 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
714
715 fldrq_wb (cpu, displacement, NoWriteBack);
716 }
717
718 /* Memory Access
719
720 load-store single register
721 There are four addressing modes available here which all employ a
722 64 bit source (base) register.
723
724 N.B. the base register (source) can be the stack pointer.
725 The secondary source register (source2)can only be an Xn register.
726
727 Scaled, 12-bit, unsigned immediate offset, without pre- and
728 post-index options.
729 Unscaled, 9-bit, signed immediate offset with pre- or post-index
730 writeback.
731 scaled or unscaled 64-bit register offset.
732 scaled or unscaled 32-bit extended register offset.
733
734 All offsets are assumed to be raw from the decode i.e. the
735 simulator is expected to adjust scaled offsets based on the
736 accessed data size with register or extended register offset
737 versions the same applies except that in the latter case the
738 operation may also require a sign extend.
739
740 A separate method is provided for each possible addressing mode. */
741
742 /* 32 bit load 32 bit scaled unsigned 12 bit */
743 static void
744 ldr32_abs (sim_cpu *cpu, uint32_t offset)
745 {
746 unsigned rn = INSTR (9, 5);
747 unsigned rt = INSTR (4, 0);
748
749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
750 /* The target register may not be SP but the source may be. */
751 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
752 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
753 + SCALE (offset, 32)));
754 }
755
756 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
757 static void
758 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
759 {
760 unsigned rn = INSTR (9, 5);
761 unsigned rt = INSTR (4, 0);
762 uint64_t address;
763
764 if (rn == rt && wb != NoWriteBack)
765 HALT_UNALLOC;
766
767 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
768
769 if (wb != Post)
770 address += offset;
771
772 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
773 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
774
775 if (wb == Post)
776 address += offset;
777
778 if (wb != NoWriteBack)
779 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
780 }
781
782 /* 32 bit load 32 bit scaled or unscaled
783 zero- or sign-extended 32-bit register offset */
784 static void
785 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
786 {
787 unsigned rm = INSTR (20, 16);
788 unsigned rn = INSTR (9, 5);
789 unsigned rt = INSTR (4, 0);
790 /* rn may reference SP, rm and rt must reference ZR */
791
792 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
793 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
794 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
795
796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
797 aarch64_set_reg_u64 (cpu, rt, NO_SP,
798 aarch64_get_mem_u32 (cpu, address + displacement));
799 }
800
801 /* 64 bit load 64 bit scaled unsigned 12 bit */
802 static void
803 ldr_abs (sim_cpu *cpu, uint32_t offset)
804 {
805 unsigned rn = INSTR (9, 5);
806 unsigned rt = INSTR (4, 0);
807
808 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
809 /* The target register may not be SP but the source may be. */
810 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
811 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
812 + SCALE (offset, 64)));
813 }
814
815 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
816 static void
817 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
818 {
819 unsigned rn = INSTR (9, 5);
820 unsigned rt = INSTR (4, 0);
821 uint64_t address;
822
823 if (rn == rt && wb != NoWriteBack)
824 HALT_UNALLOC;
825
826 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
827
828 if (wb != Post)
829 address += offset;
830
831 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
832 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
833
834 if (wb == Post)
835 address += offset;
836
837 if (wb != NoWriteBack)
838 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
839 }
840
841 /* 64 bit load 64 bit scaled or unscaled zero-
842 or sign-extended 32-bit register offset. */
843 static void
844 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
845 {
846 unsigned rm = INSTR (20, 16);
847 unsigned rn = INSTR (9, 5);
848 unsigned rt = INSTR (4, 0);
849 /* rn may reference SP, rm and rt must reference ZR */
850
851 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
852 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
853 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
854
855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
856 aarch64_set_reg_u64 (cpu, rt, NO_SP,
857 aarch64_get_mem_u64 (cpu, address + displacement));
858 }
859
860 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
861 static void
862 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
863 {
864 unsigned rn = INSTR (9, 5);
865 unsigned rt = INSTR (4, 0);
866
867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
868 /* The target register may not be SP but the source may be
869 there is no scaling required for a byte load. */
870 aarch64_set_reg_u64 (cpu, rt, NO_SP,
871 aarch64_get_mem_u8
872 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
873 }
874
875 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
876 static void
877 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
878 {
879 unsigned rn = INSTR (9, 5);
880 unsigned rt = INSTR (4, 0);
881 uint64_t address;
882
883 if (rn == rt && wb != NoWriteBack)
884 HALT_UNALLOC;
885
886 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
887
888 if (wb != Post)
889 address += offset;
890
891 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
892 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
893
894 if (wb == Post)
895 address += offset;
896
897 if (wb != NoWriteBack)
898 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
899 }
900
901 /* 32 bit load zero-extended byte scaled or unscaled zero-
902 or sign-extended 32-bit register offset. */
903 static void
904 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
905 {
906 unsigned rm = INSTR (20, 16);
907 unsigned rn = INSTR (9, 5);
908 unsigned rt = INSTR (4, 0);
909 /* rn may reference SP, rm and rt must reference ZR */
910
911 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
912 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
913 extension);
914
915 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
916 /* There is no scaling required for a byte load. */
917 aarch64_set_reg_u64 (cpu, rt, NO_SP,
918 aarch64_get_mem_u8 (cpu, address + displacement));
919 }
920
921 /* 64 bit load sign-extended byte unscaled signed 9 bit
922 with pre- or post-writeback. */
923 static void
924 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
925 {
926 unsigned rn = INSTR (9, 5);
927 unsigned rt = INSTR (4, 0);
928 uint64_t address;
929 int64_t val;
930
931 if (rn == rt && wb != NoWriteBack)
932 HALT_UNALLOC;
933
934 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
935
936 if (wb != Post)
937 address += offset;
938
939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
940 val = aarch64_get_mem_s8 (cpu, address);
941 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
942
943 if (wb == Post)
944 address += offset;
945
946 if (wb != NoWriteBack)
947 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
948 }
949
950 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
951 static void
952 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
953 {
954 ldrsb_wb (cpu, offset, NoWriteBack);
955 }
956
957 /* 64 bit load sign-extended byte scaled or unscaled zero-
958 or sign-extended 32-bit register offset. */
959 static void
960 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
961 {
962 unsigned rm = INSTR (20, 16);
963 unsigned rn = INSTR (9, 5);
964 unsigned rt = INSTR (4, 0);
965 /* rn may reference SP, rm and rt must reference ZR */
966
967 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
968 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
969 extension);
970 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
971 /* There is no scaling required for a byte load. */
972 aarch64_set_reg_s64 (cpu, rt, NO_SP,
973 aarch64_get_mem_s8 (cpu, address + displacement));
974 }
975
976 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
977 static void
978 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
979 {
980 unsigned rn = INSTR (9, 5);
981 unsigned rt = INSTR (4, 0);
982 uint32_t val;
983
984 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
985 /* The target register may not be SP but the source may be. */
986 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
987 + SCALE (offset, 16));
988 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
989 }
990
991 /* 32 bit load zero-extended short unscaled signed 9 bit
992 with pre- or post-writeback. */
993 static void
994 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
995 {
996 unsigned rn = INSTR (9, 5);
997 unsigned rt = INSTR (4, 0);
998 uint64_t address;
999
1000 if (rn == rt && wb != NoWriteBack)
1001 HALT_UNALLOC;
1002
1003 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1004
1005 if (wb != Post)
1006 address += offset;
1007
1008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1009 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1010
1011 if (wb == Post)
1012 address += offset;
1013
1014 if (wb != NoWriteBack)
1015 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1016 }
1017
1018 /* 32 bit load zero-extended short scaled or unscaled zero-
1019 or sign-extended 32-bit register offset. */
1020 static void
1021 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1022 {
1023 unsigned rm = INSTR (20, 16);
1024 unsigned rn = INSTR (9, 5);
1025 unsigned rt = INSTR (4, 0);
1026 /* rn may reference SP, rm and rt must reference ZR */
1027
1028 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1029 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1030 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1031
1032 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1033 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1034 aarch64_get_mem_u16 (cpu, address + displacement));
1035 }
1036
1037 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1038 static void
1039 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1040 {
1041 unsigned rn = INSTR (9, 5);
1042 unsigned rt = INSTR (4, 0);
1043 int32_t val;
1044
1045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1046 /* The target register may not be SP but the source may be. */
1047 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1048 + SCALE (offset, 16));
1049 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1050 }
1051
1052 /* 32 bit load sign-extended short unscaled signed 9 bit
1053 with pre- or post-writeback. */
1054 static void
1055 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1056 {
1057 unsigned rn = INSTR (9, 5);
1058 unsigned rt = INSTR (4, 0);
1059 uint64_t address;
1060
1061 if (rn == rt && wb != NoWriteBack)
1062 HALT_UNALLOC;
1063
1064 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1065
1066 if (wb != Post)
1067 address += offset;
1068
1069 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1070 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1071 (int32_t) aarch64_get_mem_s16 (cpu, address));
1072
1073 if (wb == Post)
1074 address += offset;
1075
1076 if (wb != NoWriteBack)
1077 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1078 }
1079
1080 /* 32 bit load sign-extended short scaled or unscaled zero-
1081 or sign-extended 32-bit register offset. */
1082 static void
1083 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1084 {
1085 unsigned rm = INSTR (20, 16);
1086 unsigned rn = INSTR (9, 5);
1087 unsigned rt = INSTR (4, 0);
1088 /* rn may reference SP, rm and rt must reference ZR */
1089
1090 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1091 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1092 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1093
1094 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1095 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1096 (int32_t) aarch64_get_mem_s16
1097 (cpu, address + displacement));
1098 }
1099
1100 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1101 static void
1102 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1103 {
1104 unsigned rn = INSTR (9, 5);
1105 unsigned rt = INSTR (4, 0);
1106 int64_t val;
1107
1108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1109 /* The target register may not be SP but the source may be. */
1110 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1111 + SCALE (offset, 16));
1112 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1113 }
1114
1115 /* 64 bit load sign-extended short unscaled signed 9 bit
1116 with pre- or post-writeback. */
1117 static void
1118 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1119 {
1120 unsigned rn = INSTR (9, 5);
1121 unsigned rt = INSTR (4, 0);
1122 uint64_t address;
1123 int64_t val;
1124
1125 if (rn == rt && wb != NoWriteBack)
1126 HALT_UNALLOC;
1127
1128 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1129 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1130
1131 if (wb != Post)
1132 address += offset;
1133
1134 val = aarch64_get_mem_s16 (cpu, address);
1135 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1136
1137 if (wb == Post)
1138 address += offset;
1139
1140 if (wb != NoWriteBack)
1141 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1142 }
1143
1144 /* 64 bit load sign-extended short scaled or unscaled zero-
1145 or sign-extended 32-bit register offset. */
1146 static void
1147 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1148 {
1149 unsigned rm = INSTR (20, 16);
1150 unsigned rn = INSTR (9, 5);
1151 unsigned rt = INSTR (4, 0);
1152
1153 /* rn may reference SP, rm and rt must reference ZR */
1154
1155 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1156 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1157 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1158 int64_t val;
1159
1160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1161 val = aarch64_get_mem_s16 (cpu, address + displacement);
1162 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1163 }
1164
1165 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1166 static void
1167 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1168 {
1169 unsigned rn = INSTR (9, 5);
1170 unsigned rt = INSTR (4, 0);
1171 int64_t val;
1172
1173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1174 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1175 + SCALE (offset, 32));
1176 /* The target register may not be SP but the source may be. */
1177 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1178 }
1179
1180 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1181 with pre- or post-writeback. */
1182 static void
1183 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1184 {
1185 unsigned rn = INSTR (9, 5);
1186 unsigned rt = INSTR (4, 0);
1187 uint64_t address;
1188
1189 if (rn == rt && wb != NoWriteBack)
1190 HALT_UNALLOC;
1191
1192 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1193
1194 if (wb != Post)
1195 address += offset;
1196
1197 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1198 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1199
1200 if (wb == Post)
1201 address += offset;
1202
1203 if (wb != NoWriteBack)
1204 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1205 }
1206
1207 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1208 or sign-extended 32-bit register offset. */
1209 static void
1210 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1211 {
1212 unsigned rm = INSTR (20, 16);
1213 unsigned rn = INSTR (9, 5);
1214 unsigned rt = INSTR (4, 0);
1215 /* rn may reference SP, rm and rt must reference ZR */
1216
1217 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1218 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1219 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1220
1221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1222 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1223 aarch64_get_mem_s32 (cpu, address + displacement));
1224 }
1225
1226 /* N.B. with stores the value in source is written to the
1227 address identified by source2 modified by source3/offset. */
1228
1229 /* 32 bit store scaled unsigned 12 bit. */
1230 static void
1231 str32_abs (sim_cpu *cpu, uint32_t offset)
1232 {
1233 unsigned rn = INSTR (9, 5);
1234 unsigned rt = INSTR (4, 0);
1235
1236 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1237 /* The target register may not be SP but the source may be. */
1238 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1239 + SCALE (offset, 32)),
1240 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1241 }
1242
1243 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1244 static void
1245 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1246 {
1247 unsigned rn = INSTR (9, 5);
1248 unsigned rt = INSTR (4, 0);
1249 uint64_t address;
1250
1251 if (rn == rt && wb != NoWriteBack)
1252 HALT_UNALLOC;
1253
1254 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1255 if (wb != Post)
1256 address += offset;
1257
1258 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1259 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1260
1261 if (wb == Post)
1262 address += offset;
1263
1264 if (wb != NoWriteBack)
1265 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1266 }
1267
1268 /* 32 bit store scaled or unscaled zero- or
1269 sign-extended 32-bit register offset. */
1270 static void
1271 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1272 {
1273 unsigned rm = INSTR (20, 16);
1274 unsigned rn = INSTR (9, 5);
1275 unsigned rt = INSTR (4, 0);
1276
1277 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1278 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1279 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1280
1281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1282 aarch64_set_mem_u32 (cpu, address + displacement,
1283 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1284 }
1285
1286 /* 64 bit store scaled unsigned 12 bit. */
1287 static void
1288 str_abs (sim_cpu *cpu, uint32_t offset)
1289 {
1290 unsigned rn = INSTR (9, 5);
1291 unsigned rt = INSTR (4, 0);
1292
1293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1294 aarch64_set_mem_u64 (cpu,
1295 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1296 + SCALE (offset, 64),
1297 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1298 }
1299
1300 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1301 static void
1302 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1303 {
1304 unsigned rn = INSTR (9, 5);
1305 unsigned rt = INSTR (4, 0);
1306 uint64_t address;
1307
1308 if (rn == rt && wb != NoWriteBack)
1309 HALT_UNALLOC;
1310
1311 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1312
1313 if (wb != Post)
1314 address += offset;
1315
1316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1317 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1318
1319 if (wb == Post)
1320 address += offset;
1321
1322 if (wb != NoWriteBack)
1323 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1324 }
1325
1326 /* 64 bit store scaled or unscaled zero-
1327 or sign-extended 32-bit register offset. */
1328 static void
1329 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1330 {
1331 unsigned rm = INSTR (20, 16);
1332 unsigned rn = INSTR (9, 5);
1333 unsigned rt = INSTR (4, 0);
1334 /* rn may reference SP, rm and rt must reference ZR */
1335
1336 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1337 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1338 extension);
1339 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1340
1341 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1342 aarch64_set_mem_u64 (cpu, address + displacement,
1343 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1344 }
1345
1346 /* 32 bit store byte scaled unsigned 12 bit. */
1347 static void
1348 strb_abs (sim_cpu *cpu, uint32_t offset)
1349 {
1350 unsigned rn = INSTR (9, 5);
1351 unsigned rt = INSTR (4, 0);
1352
1353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1354 /* The target register may not be SP but the source may be.
1355 There is no scaling required for a byte load. */
1356 aarch64_set_mem_u8 (cpu,
1357 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1358 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1359 }
1360
1361 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1362 static void
1363 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1364 {
1365 unsigned rn = INSTR (9, 5);
1366 unsigned rt = INSTR (4, 0);
1367 uint64_t address;
1368
1369 if (rn == rt && wb != NoWriteBack)
1370 HALT_UNALLOC;
1371
1372 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1373
1374 if (wb != Post)
1375 address += offset;
1376
1377 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1378 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1379
1380 if (wb == Post)
1381 address += offset;
1382
1383 if (wb != NoWriteBack)
1384 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1385 }
1386
1387 /* 32 bit store byte scaled or unscaled zero-
1388 or sign-extended 32-bit register offset. */
1389 static void
1390 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1391 {
1392 unsigned rm = INSTR (20, 16);
1393 unsigned rn = INSTR (9, 5);
1394 unsigned rt = INSTR (4, 0);
1395 /* rn may reference SP, rm and rt must reference ZR */
1396
1397 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1399 extension);
1400
1401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1402 /* There is no scaling required for a byte load. */
1403 aarch64_set_mem_u8 (cpu, address + displacement,
1404 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1405 }
1406
1407 /* 32 bit store short scaled unsigned 12 bit. */
1408 static void
1409 strh_abs (sim_cpu *cpu, uint32_t offset)
1410 {
1411 unsigned rn = INSTR (9, 5);
1412 unsigned rt = INSTR (4, 0);
1413
1414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1415 /* The target register may not be SP but the source may be. */
1416 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1417 + SCALE (offset, 16),
1418 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1419 }
1420
1421 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1422 static void
1423 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1424 {
1425 unsigned rn = INSTR (9, 5);
1426 unsigned rt = INSTR (4, 0);
1427 uint64_t address;
1428
1429 if (rn == rt && wb != NoWriteBack)
1430 HALT_UNALLOC;
1431
1432 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1433
1434 if (wb != Post)
1435 address += offset;
1436
1437 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1438 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1439
1440 if (wb == Post)
1441 address += offset;
1442
1443 if (wb != NoWriteBack)
1444 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1445 }
1446
1447 /* 32 bit store short scaled or unscaled zero-
1448 or sign-extended 32-bit register offset. */
1449 static void
1450 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1451 {
1452 unsigned rm = INSTR (20, 16);
1453 unsigned rn = INSTR (9, 5);
1454 unsigned rt = INSTR (4, 0);
1455 /* rn may reference SP, rm and rt must reference ZR */
1456
1457 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1458 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1459 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1460
1461 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1462 aarch64_set_mem_u16 (cpu, address + displacement,
1463 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1464 }
1465
1466 /* Prefetch unsigned 12 bit. */
1467 static void
1468 prfm_abs (sim_cpu *cpu, uint32_t offset)
1469 {
1470 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1471 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1472 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1473 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1474 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1475 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1476 ow ==> UNALLOC
1477 PrfOp prfop = prfop (instr, 4, 0);
1478 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1479 + SCALE (offset, 64). */
1480
1481 /* TODO : implement prefetch of address. */
1482 }
1483
1484 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1485 static void
1486 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1487 {
1488 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1489 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1490 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1491 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1492 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1493 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1494 ow ==> UNALLOC
1495 rn may reference SP, rm may only reference ZR
1496 PrfOp prfop = prfop (instr, 4, 0);
1497 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1499 extension);
1500 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1501 uint64_t address = base + displacement. */
1502
1503 /* TODO : implement prefetch of address */
1504 }
1505
1506 /* 64 bit pc-relative prefetch. */
1507 static void
1508 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1509 {
1510 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1511 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1512 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1513 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1514 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1515 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1516 ow ==> UNALLOC
1517 PrfOp prfop = prfop (instr, 4, 0);
1518 uint64_t address = aarch64_get_PC (cpu) + offset. */
1519
1520 /* TODO : implement this */
1521 }
1522
1523 /* Load-store exclusive. */
1524
1525 static void
1526 ldxr (sim_cpu *cpu)
1527 {
1528 unsigned rn = INSTR (9, 5);
1529 unsigned rt = INSTR (4, 0);
1530 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1531 int size = INSTR (31, 30);
1532 /* int ordered = INSTR (15, 15); */
1533 /* int exclusive = ! INSTR (23, 23); */
1534
1535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1536 switch (size)
1537 {
1538 case 0:
1539 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1540 break;
1541 case 1:
1542 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1543 break;
1544 case 2:
1545 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1546 break;
1547 case 3:
1548 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1549 break;
1550 }
1551 }
1552
1553 static void
1554 stxr (sim_cpu *cpu)
1555 {
1556 unsigned rn = INSTR (9, 5);
1557 unsigned rt = INSTR (4, 0);
1558 unsigned rs = INSTR (20, 16);
1559 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1560 int size = INSTR (31, 30);
1561 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1562
1563 switch (size)
1564 {
1565 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1566 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1567 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1568 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1569 }
1570
1571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1572 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1573 }
1574
1575 static void
1576 dexLoadLiteral (sim_cpu *cpu)
1577 {
1578 /* instr[29,27] == 011
1579 instr[25,24] == 00
1580 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1581 010 ==> LDRX, 011 ==> FLDRD
1582 100 ==> LDRSW, 101 ==> FLDRQ
1583 110 ==> PRFM, 111 ==> UNALLOC
1584 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1585 instr[23, 5] == simm19 */
1586
1587 /* unsigned rt = INSTR (4, 0); */
1588 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1589 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1590
1591 switch (dispatch)
1592 {
1593 case 0: ldr32_pcrel (cpu, imm); break;
1594 case 1: fldrs_pcrel (cpu, imm); break;
1595 case 2: ldr_pcrel (cpu, imm); break;
1596 case 3: fldrd_pcrel (cpu, imm); break;
1597 case 4: ldrsw_pcrel (cpu, imm); break;
1598 case 5: fldrq_pcrel (cpu, imm); break;
1599 case 6: prfm_pcrel (cpu, imm); break;
1600 case 7:
1601 default:
1602 HALT_UNALLOC;
1603 }
1604 }
1605
1606 /* Immediate arithmetic
1607 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1608 value left shifted by 12 bits (done at decode).
1609
1610 N.B. the register args (dest, source) can normally be Xn or SP.
1611 the exception occurs for flag setting instructions which may
1612 only use Xn for the output (dest). */
1613
1614 /* 32 bit add immediate. */
1615 static void
1616 add32 (sim_cpu *cpu, uint32_t aimm)
1617 {
1618 unsigned rn = INSTR (9, 5);
1619 unsigned rd = INSTR (4, 0);
1620
1621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1622 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1623 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1624 }
1625
1626 /* 64 bit add immediate. */
1627 static void
1628 add64 (sim_cpu *cpu, uint32_t aimm)
1629 {
1630 unsigned rn = INSTR (9, 5);
1631 unsigned rd = INSTR (4, 0);
1632
1633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1634 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1635 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1636 }
1637
1638 static void
1639 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1640 {
1641 int32_t result = value1 + value2;
1642 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1643 uint64_t uresult = (uint64_t)(uint32_t) value1
1644 + (uint64_t)(uint32_t) value2;
1645 uint32_t flags = 0;
1646
1647 if (result == 0)
1648 flags |= Z;
1649
1650 if (result & (1 << 31))
1651 flags |= N;
1652
1653 if (uresult != result)
1654 flags |= C;
1655
1656 if (sresult != result)
1657 flags |= V;
1658
1659 aarch64_set_CPSR (cpu, flags);
1660 }
1661
1662 #define NEG(a) (((a) & signbit) == signbit)
1663 #define POS(a) (((a) & signbit) == 0)
1664
1665 static void
1666 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1667 {
1668 uint64_t result = value1 + value2;
1669 uint32_t flags = 0;
1670 uint64_t signbit = 1ULL << 63;
1671
1672 if (result == 0)
1673 flags |= Z;
1674
1675 if (NEG (result))
1676 flags |= N;
1677
1678 if ( (NEG (value1) && NEG (value2))
1679 || (NEG (value1) && POS (result))
1680 || (NEG (value2) && POS (result)))
1681 flags |= C;
1682
1683 if ( (NEG (value1) && NEG (value2) && POS (result))
1684 || (POS (value1) && POS (value2) && NEG (result)))
1685 flags |= V;
1686
1687 aarch64_set_CPSR (cpu, flags);
1688 }
1689
1690 static void
1691 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1692 {
1693 uint32_t result = value1 - value2;
1694 uint32_t flags = 0;
1695 uint32_t signbit = 1U << 31;
1696
1697 if (result == 0)
1698 flags |= Z;
1699
1700 if (NEG (result))
1701 flags |= N;
1702
1703 if ( (NEG (value1) && POS (value2))
1704 || (NEG (value1) && POS (result))
1705 || (POS (value2) && POS (result)))
1706 flags |= C;
1707
1708 if ( (NEG (value1) && POS (value2) && POS (result))
1709 || (POS (value1) && NEG (value2) && NEG (result)))
1710 flags |= V;
1711
1712 aarch64_set_CPSR (cpu, flags);
1713 }
1714
1715 static void
1716 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1717 {
1718 uint64_t result = value1 - value2;
1719 uint32_t flags = 0;
1720 uint64_t signbit = 1ULL << 63;
1721
1722 if (result == 0)
1723 flags |= Z;
1724
1725 if (NEG (result))
1726 flags |= N;
1727
1728 if ( (NEG (value1) && POS (value2))
1729 || (NEG (value1) && POS (result))
1730 || (POS (value2) && POS (result)))
1731 flags |= C;
1732
1733 if ( (NEG (value1) && POS (value2) && POS (result))
1734 || (POS (value1) && NEG (value2) && NEG (result)))
1735 flags |= V;
1736
1737 aarch64_set_CPSR (cpu, flags);
1738 }
1739
1740 static void
1741 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1742 {
1743 uint32_t flags = 0;
1744
1745 if (result == 0)
1746 flags |= Z;
1747 else
1748 flags &= ~ Z;
1749
1750 if (result & (1 << 31))
1751 flags |= N;
1752 else
1753 flags &= ~ N;
1754
1755 aarch64_set_CPSR (cpu, flags);
1756 }
1757
1758 static void
1759 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1760 {
1761 uint32_t flags = 0;
1762
1763 if (result == 0)
1764 flags |= Z;
1765 else
1766 flags &= ~ Z;
1767
1768 if (result & (1ULL << 63))
1769 flags |= N;
1770 else
1771 flags &= ~ N;
1772
1773 aarch64_set_CPSR (cpu, flags);
1774 }
1775
1776 /* 32 bit add immediate set flags. */
1777 static void
1778 adds32 (sim_cpu *cpu, uint32_t aimm)
1779 {
1780 unsigned rn = INSTR (9, 5);
1781 unsigned rd = INSTR (4, 0);
1782 /* TODO : do we need to worry about signs here? */
1783 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1784
1785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1786 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1787 set_flags_for_add32 (cpu, value1, aimm);
1788 }
1789
1790 /* 64 bit add immediate set flags. */
1791 static void
1792 adds64 (sim_cpu *cpu, uint32_t aimm)
1793 {
1794 unsigned rn = INSTR (9, 5);
1795 unsigned rd = INSTR (4, 0);
1796 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1797 uint64_t value2 = aimm;
1798
1799 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1800 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1801 set_flags_for_add64 (cpu, value1, value2);
1802 }
1803
1804 /* 32 bit sub immediate. */
1805 static void
1806 sub32 (sim_cpu *cpu, uint32_t aimm)
1807 {
1808 unsigned rn = INSTR (9, 5);
1809 unsigned rd = INSTR (4, 0);
1810
1811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1812 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1813 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1814 }
1815
1816 /* 64 bit sub immediate. */
1817 static void
1818 sub64 (sim_cpu *cpu, uint32_t aimm)
1819 {
1820 unsigned rn = INSTR (9, 5);
1821 unsigned rd = INSTR (4, 0);
1822
1823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1824 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1825 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1826 }
1827
1828 /* 32 bit sub immediate set flags. */
1829 static void
1830 subs32 (sim_cpu *cpu, uint32_t aimm)
1831 {
1832 unsigned rn = INSTR (9, 5);
1833 unsigned rd = INSTR (4, 0);
1834 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1835 uint32_t value2 = aimm;
1836
1837 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1838 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1839 set_flags_for_sub32 (cpu, value1, value2);
1840 }
1841
1842 /* 64 bit sub immediate set flags. */
1843 static void
1844 subs64 (sim_cpu *cpu, uint32_t aimm)
1845 {
1846 unsigned rn = INSTR (9, 5);
1847 unsigned rd = INSTR (4, 0);
1848 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1849 uint32_t value2 = aimm;
1850
1851 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1852 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1853 set_flags_for_sub64 (cpu, value1, value2);
1854 }
1855
1856 /* Data Processing Register. */
1857
1858 /* First two helpers to perform the shift operations. */
1859
1860 static inline uint32_t
1861 shifted32 (uint32_t value, Shift shift, uint32_t count)
1862 {
1863 switch (shift)
1864 {
1865 default:
1866 case LSL:
1867 return (value << count);
1868 case LSR:
1869 return (value >> count);
1870 case ASR:
1871 {
1872 int32_t svalue = value;
1873 return (svalue >> count);
1874 }
1875 case ROR:
1876 {
1877 uint32_t top = value >> count;
1878 uint32_t bottom = value << (32 - count);
1879 return (bottom | top);
1880 }
1881 }
1882 }
1883
1884 static inline uint64_t
1885 shifted64 (uint64_t value, Shift shift, uint32_t count)
1886 {
1887 switch (shift)
1888 {
1889 default:
1890 case LSL:
1891 return (value << count);
1892 case LSR:
1893 return (value >> count);
1894 case ASR:
1895 {
1896 int64_t svalue = value;
1897 return (svalue >> count);
1898 }
1899 case ROR:
1900 {
1901 uint64_t top = value >> count;
1902 uint64_t bottom = value << (64 - count);
1903 return (bottom | top);
1904 }
1905 }
1906 }
1907
1908 /* Arithmetic shifted register.
1909 These allow an optional LSL, ASR or LSR to the second source
1910 register with a count up to the register bit count.
1911
1912 N.B register args may not be SP. */
1913
1914 /* 32 bit ADD shifted register. */
1915 static void
1916 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1917 {
1918 unsigned rm = INSTR (20, 16);
1919 unsigned rn = INSTR (9, 5);
1920 unsigned rd = INSTR (4, 0);
1921
1922 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1923 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1924 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1925 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1926 shift, count));
1927 }
1928
1929 /* 64 bit ADD shifted register. */
1930 static void
1931 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1932 {
1933 unsigned rm = INSTR (20, 16);
1934 unsigned rn = INSTR (9, 5);
1935 unsigned rd = INSTR (4, 0);
1936
1937 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1938 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1939 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1940 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1941 shift, count));
1942 }
1943
1944 /* 32 bit ADD shifted register setting flags. */
1945 static void
1946 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1947 {
1948 unsigned rm = INSTR (20, 16);
1949 unsigned rn = INSTR (9, 5);
1950 unsigned rd = INSTR (4, 0);
1951
1952 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1953 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1954 shift, count);
1955
1956 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1957 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1958 set_flags_for_add32 (cpu, value1, value2);
1959 }
1960
1961 /* 64 bit ADD shifted register setting flags. */
1962 static void
1963 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1964 {
1965 unsigned rm = INSTR (20, 16);
1966 unsigned rn = INSTR (9, 5);
1967 unsigned rd = INSTR (4, 0);
1968
1969 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1970 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1971 shift, count);
1972
1973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1974 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1975 set_flags_for_add64 (cpu, value1, value2);
1976 }
1977
1978 /* 32 bit SUB shifted register. */
1979 static void
1980 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1981 {
1982 unsigned rm = INSTR (20, 16);
1983 unsigned rn = INSTR (9, 5);
1984 unsigned rd = INSTR (4, 0);
1985
1986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1987 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1988 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1989 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1990 shift, count));
1991 }
1992
1993 /* 64 bit SUB shifted register. */
1994 static void
1995 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1996 {
1997 unsigned rm = INSTR (20, 16);
1998 unsigned rn = INSTR (9, 5);
1999 unsigned rd = INSTR (4, 0);
2000
2001 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2002 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2003 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2004 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2005 shift, count));
2006 }
2007
2008 /* 32 bit SUB shifted register setting flags. */
2009 static void
2010 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2011 {
2012 unsigned rm = INSTR (20, 16);
2013 unsigned rn = INSTR (9, 5);
2014 unsigned rd = INSTR (4, 0);
2015
2016 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2017 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2018 shift, count);
2019
2020 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2021 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2022 set_flags_for_sub32 (cpu, value1, value2);
2023 }
2024
2025 /* 64 bit SUB shifted register setting flags. */
2026 static void
2027 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2028 {
2029 unsigned rm = INSTR (20, 16);
2030 unsigned rn = INSTR (9, 5);
2031 unsigned rd = INSTR (4, 0);
2032
2033 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2034 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2035 shift, count);
2036
2037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2038 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2039 set_flags_for_sub64 (cpu, value1, value2);
2040 }
2041
2042 /* First a couple more helpers to fetch the
2043 relevant source register element either
2044 sign or zero extended as required by the
2045 extension value. */
2046
2047 static uint32_t
2048 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2049 {
2050 switch (extension)
2051 {
2052 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2053 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2054 case UXTW: /* Fall through. */
2055 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2056 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2057 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2058 case SXTW: /* Fall through. */
2059 case SXTX: /* Fall through. */
2060 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2061 }
2062 }
2063
2064 static uint64_t
2065 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2066 {
2067 switch (extension)
2068 {
2069 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2070 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2071 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2072 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2073 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2074 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2075 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2076 case SXTX:
2077 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2078 }
2079 }
2080
2081 /* Arithmetic extending register
2082 These allow an optional sign extension of some portion of the
2083 second source register followed by an optional left shift of
2084 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2085
2086 N.B output (dest) and first input arg (source) may normally be Xn
2087 or SP. However, for flag setting operations dest can only be
2088 Xn. Second input registers are always Xn. */
2089
2090 /* 32 bit ADD extending register. */
2091 static void
2092 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2093 {
2094 unsigned rm = INSTR (20, 16);
2095 unsigned rn = INSTR (9, 5);
2096 unsigned rd = INSTR (4, 0);
2097
2098 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2099 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2100 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2101 + (extreg32 (cpu, rm, extension) << shift));
2102 }
2103
2104 /* 64 bit ADD extending register.
2105 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2106 static void
2107 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2108 {
2109 unsigned rm = INSTR (20, 16);
2110 unsigned rn = INSTR (9, 5);
2111 unsigned rd = INSTR (4, 0);
2112
2113 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2114 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2115 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2116 + (extreg64 (cpu, rm, extension) << shift));
2117 }
2118
2119 /* 32 bit ADD extending register setting flags. */
2120 static void
2121 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2122 {
2123 unsigned rm = INSTR (20, 16);
2124 unsigned rn = INSTR (9, 5);
2125 unsigned rd = INSTR (4, 0);
2126
2127 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2128 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2129
2130 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2131 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2132 set_flags_for_add32 (cpu, value1, value2);
2133 }
2134
2135 /* 64 bit ADD extending register setting flags */
2136 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2137 static void
2138 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2139 {
2140 unsigned rm = INSTR (20, 16);
2141 unsigned rn = INSTR (9, 5);
2142 unsigned rd = INSTR (4, 0);
2143
2144 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2145 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2146
2147 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2148 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2149 set_flags_for_add64 (cpu, value1, value2);
2150 }
2151
2152 /* 32 bit SUB extending register. */
2153 static void
2154 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2155 {
2156 unsigned rm = INSTR (20, 16);
2157 unsigned rn = INSTR (9, 5);
2158 unsigned rd = INSTR (4, 0);
2159
2160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2161 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2162 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2163 - (extreg32 (cpu, rm, extension) << shift));
2164 }
2165
2166 /* 64 bit SUB extending register. */
2167 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2168 static void
2169 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2170 {
2171 unsigned rm = INSTR (20, 16);
2172 unsigned rn = INSTR (9, 5);
2173 unsigned rd = INSTR (4, 0);
2174
2175 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2176 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2177 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2178 - (extreg64 (cpu, rm, extension) << shift));
2179 }
2180
2181 /* 32 bit SUB extending register setting flags. */
2182 static void
2183 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2184 {
2185 unsigned rm = INSTR (20, 16);
2186 unsigned rn = INSTR (9, 5);
2187 unsigned rd = INSTR (4, 0);
2188
2189 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2190 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2191
2192 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2193 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2194 set_flags_for_sub32 (cpu, value1, value2);
2195 }
2196
2197 /* 64 bit SUB extending register setting flags */
2198 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2199 static void
2200 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2201 {
2202 unsigned rm = INSTR (20, 16);
2203 unsigned rn = INSTR (9, 5);
2204 unsigned rd = INSTR (4, 0);
2205
2206 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2207 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2208
2209 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2210 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2211 set_flags_for_sub64 (cpu, value1, value2);
2212 }
2213
2214 static void
2215 dexAddSubtractImmediate (sim_cpu *cpu)
2216 {
2217 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2218 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2219 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2220 instr[28,24] = 10001
2221 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2222 instr[21,10] = uimm12
2223 instr[9,5] = Rn
2224 instr[4,0] = Rd */
2225
2226 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2227 uint32_t shift = INSTR (23, 22);
2228 uint32_t imm = INSTR (21, 10);
2229 uint32_t dispatch = INSTR (31, 29);
2230
2231 NYI_assert (28, 24, 0x11);
2232
2233 if (shift > 1)
2234 HALT_UNALLOC;
2235
2236 if (shift)
2237 imm <<= 12;
2238
2239 switch (dispatch)
2240 {
2241 case 0: add32 (cpu, imm); break;
2242 case 1: adds32 (cpu, imm); break;
2243 case 2: sub32 (cpu, imm); break;
2244 case 3: subs32 (cpu, imm); break;
2245 case 4: add64 (cpu, imm); break;
2246 case 5: adds64 (cpu, imm); break;
2247 case 6: sub64 (cpu, imm); break;
2248 case 7: subs64 (cpu, imm); break;
2249 }
2250 }
2251
2252 static void
2253 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2254 {
2255 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2256 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2257 instr[28,24] = 01011
2258 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2259 instr[21] = 0
2260 instr[20,16] = Rm
2261 instr[15,10] = count : must be 0xxxxx for 32 bit
2262 instr[9,5] = Rn
2263 instr[4,0] = Rd */
2264
2265 uint32_t size = INSTR (31, 31);
2266 uint32_t count = INSTR (15, 10);
2267 Shift shiftType = INSTR (23, 22);
2268
2269 NYI_assert (28, 24, 0x0B);
2270 NYI_assert (21, 21, 0);
2271
2272 /* Shift encoded as ROR is unallocated. */
2273 if (shiftType == ROR)
2274 HALT_UNALLOC;
2275
2276 /* 32 bit operations must have count[5] = 0
2277 or else we have an UNALLOC. */
2278 if (size == 0 && uimm (count, 5, 5))
2279 HALT_UNALLOC;
2280
2281 /* Dispatch on size:op i.e instr [31,29]. */
2282 switch (INSTR (31, 29))
2283 {
2284 case 0: add32_shift (cpu, shiftType, count); break;
2285 case 1: adds32_shift (cpu, shiftType, count); break;
2286 case 2: sub32_shift (cpu, shiftType, count); break;
2287 case 3: subs32_shift (cpu, shiftType, count); break;
2288 case 4: add64_shift (cpu, shiftType, count); break;
2289 case 5: adds64_shift (cpu, shiftType, count); break;
2290 case 6: sub64_shift (cpu, shiftType, count); break;
2291 case 7: subs64_shift (cpu, shiftType, count); break;
2292 }
2293 }
2294
2295 static void
2296 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2297 {
2298 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2299 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2300 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2301 instr[28,24] = 01011
2302 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2303 instr[21] = 1
2304 instr[20,16] = Rm
2305 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2306 000 ==> LSL|UXTW, 001 ==> UXTZ,
2307 000 ==> SXTB, 001 ==> SXTH,
2308 000 ==> SXTW, 001 ==> SXTX,
2309 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2310 instr[9,5] = Rn
2311 instr[4,0] = Rd */
2312
2313 Extension extensionType = INSTR (15, 13);
2314 uint32_t shift = INSTR (12, 10);
2315
2316 NYI_assert (28, 24, 0x0B);
2317 NYI_assert (21, 21, 1);
2318
2319 /* Shift may not exceed 4. */
2320 if (shift > 4)
2321 HALT_UNALLOC;
2322
2323 /* Dispatch on size:op:set?. */
2324 switch (INSTR (31, 29))
2325 {
2326 case 0: add32_ext (cpu, extensionType, shift); break;
2327 case 1: adds32_ext (cpu, extensionType, shift); break;
2328 case 2: sub32_ext (cpu, extensionType, shift); break;
2329 case 3: subs32_ext (cpu, extensionType, shift); break;
2330 case 4: add64_ext (cpu, extensionType, shift); break;
2331 case 5: adds64_ext (cpu, extensionType, shift); break;
2332 case 6: sub64_ext (cpu, extensionType, shift); break;
2333 case 7: subs64_ext (cpu, extensionType, shift); break;
2334 }
2335 }
2336
2337 /* Conditional data processing
2338 Condition register is implicit 3rd source. */
2339
2340 /* 32 bit add with carry. */
2341 /* N.B register args may not be SP. */
2342
2343 static void
2344 adc32 (sim_cpu *cpu)
2345 {
2346 unsigned rm = INSTR (20, 16);
2347 unsigned rn = INSTR (9, 5);
2348 unsigned rd = INSTR (4, 0);
2349
2350 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2351 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2352 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2353 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2354 + IS_SET (C));
2355 }
2356
2357 /* 64 bit add with carry */
2358 static void
2359 adc64 (sim_cpu *cpu)
2360 {
2361 unsigned rm = INSTR (20, 16);
2362 unsigned rn = INSTR (9, 5);
2363 unsigned rd = INSTR (4, 0);
2364
2365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2366 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2367 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2368 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2369 + IS_SET (C));
2370 }
2371
2372 /* 32 bit add with carry setting flags. */
2373 static void
2374 adcs32 (sim_cpu *cpu)
2375 {
2376 unsigned rm = INSTR (20, 16);
2377 unsigned rn = INSTR (9, 5);
2378 unsigned rd = INSTR (4, 0);
2379
2380 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2381 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2382 uint32_t carry = IS_SET (C);
2383
2384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2385 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2386 set_flags_for_add32 (cpu, value1, value2 + carry);
2387 }
2388
2389 /* 64 bit add with carry setting flags. */
2390 static void
2391 adcs64 (sim_cpu *cpu)
2392 {
2393 unsigned rm = INSTR (20, 16);
2394 unsigned rn = INSTR (9, 5);
2395 unsigned rd = INSTR (4, 0);
2396
2397 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2398 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2399 uint64_t carry = IS_SET (C);
2400
2401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2402 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2403 set_flags_for_add64 (cpu, value1, value2 + carry);
2404 }
2405
2406 /* 32 bit sub with carry. */
2407 static void
2408 sbc32 (sim_cpu *cpu)
2409 {
2410 unsigned rm = INSTR (20, 16);
2411 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2412 unsigned rd = INSTR (4, 0);
2413
2414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2415 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2416 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2417 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2418 - 1 + IS_SET (C));
2419 }
2420
2421 /* 64 bit sub with carry */
2422 static void
2423 sbc64 (sim_cpu *cpu)
2424 {
2425 unsigned rm = INSTR (20, 16);
2426 unsigned rn = INSTR (9, 5);
2427 unsigned rd = INSTR (4, 0);
2428
2429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2430 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2431 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2432 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2433 - 1 + IS_SET (C));
2434 }
2435
2436 /* 32 bit sub with carry setting flags */
2437 static void
2438 sbcs32 (sim_cpu *cpu)
2439 {
2440 unsigned rm = INSTR (20, 16);
2441 unsigned rn = INSTR (9, 5);
2442 unsigned rd = INSTR (4, 0);
2443
2444 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2445 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2446 uint32_t carry = IS_SET (C);
2447 uint32_t result = value1 - value2 + 1 - carry;
2448
2449 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2450 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2451 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2452 }
2453
2454 /* 64 bit sub with carry setting flags */
2455 static void
2456 sbcs64 (sim_cpu *cpu)
2457 {
2458 unsigned rm = INSTR (20, 16);
2459 unsigned rn = INSTR (9, 5);
2460 unsigned rd = INSTR (4, 0);
2461
2462 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2463 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2464 uint64_t carry = IS_SET (C);
2465 uint64_t result = value1 - value2 + 1 - carry;
2466
2467 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2468 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2469 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2470 }
2471
2472 static void
2473 dexAddSubtractWithCarry (sim_cpu *cpu)
2474 {
2475 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2476 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2477 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2478 instr[28,21] = 1 1010 000
2479 instr[20,16] = Rm
2480 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2481 instr[9,5] = Rn
2482 instr[4,0] = Rd */
2483
2484 uint32_t op2 = INSTR (15, 10);
2485
2486 NYI_assert (28, 21, 0xD0);
2487
2488 if (op2 != 0)
2489 HALT_UNALLOC;
2490
2491 /* Dispatch on size:op:set?. */
2492 switch (INSTR (31, 29))
2493 {
2494 case 0: adc32 (cpu); break;
2495 case 1: adcs32 (cpu); break;
2496 case 2: sbc32 (cpu); break;
2497 case 3: sbcs32 (cpu); break;
2498 case 4: adc64 (cpu); break;
2499 case 5: adcs64 (cpu); break;
2500 case 6: sbc64 (cpu); break;
2501 case 7: sbcs64 (cpu); break;
2502 }
2503 }
2504
2505 static uint32_t
2506 testConditionCode (sim_cpu *cpu, CondCode cc)
2507 {
2508 /* This should be reduceable to branchless logic
2509 by some careful testing of bits in CC followed
2510 by the requisite masking and combining of bits
2511 from the flag register.
2512
2513 For now we do it with a switch. */
2514 int res;
2515
2516 switch (cc)
2517 {
2518 case EQ: res = IS_SET (Z); break;
2519 case NE: res = IS_CLEAR (Z); break;
2520 case CS: res = IS_SET (C); break;
2521 case CC: res = IS_CLEAR (C); break;
2522 case MI: res = IS_SET (N); break;
2523 case PL: res = IS_CLEAR (N); break;
2524 case VS: res = IS_SET (V); break;
2525 case VC: res = IS_CLEAR (V); break;
2526 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2527 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2528 case GE: res = IS_SET (N) == IS_SET (V); break;
2529 case LT: res = IS_SET (N) != IS_SET (V); break;
2530 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2531 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2532 case AL:
2533 case NV:
2534 default:
2535 res = 1;
2536 break;
2537 }
2538 return res;
2539 }
2540
2541 static void
2542 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2543 {
2544 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2545 instr[30] = compare with positive (1) or negative value (0)
2546 instr[29,21] = 1 1101 0010
2547 instr[20,16] = Rm or const
2548 instr[15,12] = cond
2549 instr[11] = compare reg (0) or const (1)
2550 instr[10] = 0
2551 instr[9,5] = Rn
2552 instr[4] = 0
2553 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2554 signed int negate;
2555 unsigned rm;
2556 unsigned rn;
2557
2558 NYI_assert (29, 21, 0x1d2);
2559 NYI_assert (10, 10, 0);
2560 NYI_assert (4, 4, 0);
2561
2562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2563 if (! testConditionCode (cpu, INSTR (15, 12)))
2564 {
2565 aarch64_set_CPSR (cpu, INSTR (3, 0));
2566 return;
2567 }
2568
2569 negate = INSTR (30, 30) ? 1 : -1;
2570 rm = INSTR (20, 16);
2571 rn = INSTR ( 9, 5);
2572
2573 if (INSTR (31, 31))
2574 {
2575 if (INSTR (11, 11))
2576 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2577 negate * (uint64_t) rm);
2578 else
2579 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2580 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2581 }
2582 else
2583 {
2584 if (INSTR (11, 11))
2585 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2586 negate * rm);
2587 else
2588 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2589 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2590 }
2591 }
2592
2593 static void
2594 do_vec_MOV_whole_vector (sim_cpu *cpu)
2595 {
2596 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2597
2598 instr[31] = 0
2599 instr[30] = half(0)/full(1)
2600 instr[29,21] = 001110101
2601 instr[20,16] = Vs
2602 instr[15,10] = 000111
2603 instr[9,5] = Vs
2604 instr[4,0] = Vd */
2605
2606 unsigned vs = INSTR (9, 5);
2607 unsigned vd = INSTR (4, 0);
2608
2609 NYI_assert (29, 21, 0x075);
2610 NYI_assert (15, 10, 0x07);
2611
2612 if (INSTR (20, 16) != vs)
2613 HALT_NYI;
2614
2615 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2616 if (INSTR (30, 30))
2617 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2618
2619 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2620 }
2621
2622 static void
2623 do_vec_MOV_into_scalar (sim_cpu *cpu)
2624 {
2625 /* instr[31] = 0
2626 instr[30] = word(0)/long(1)
2627 instr[29,21] = 00 1110 000
2628 instr[20,18] = element size and index
2629 instr[17,10] = 00 0011 11
2630 instr[9,5] = V source
2631 instr[4,0] = R dest */
2632
2633 unsigned vs = INSTR (9, 5);
2634 unsigned rd = INSTR (4, 0);
2635
2636 NYI_assert (29, 21, 0x070);
2637 NYI_assert (17, 10, 0x0F);
2638
2639 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2640 switch (INSTR (20, 18))
2641 {
2642 case 0x2:
2643 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
2644 break;
2645
2646 case 0x6:
2647 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
2648 break;
2649
2650 case 0x1:
2651 case 0x3:
2652 case 0x5:
2653 case 0x7:
2654 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
2655 (cpu, vs, INSTR (20, 19)));
2656 break;
2657
2658 default:
2659 HALT_NYI;
2660 }
2661 }
2662
2663 static void
2664 do_vec_INS (sim_cpu *cpu)
2665 {
2666 /* instr[31,21] = 01001110000
2667 instr[20,16] = element size and index
2668 instr[15,10] = 000111
2669 instr[9,5] = W source
2670 instr[4,0] = V dest */
2671
2672 int index;
2673 unsigned rs = INSTR (9, 5);
2674 unsigned vd = INSTR (4, 0);
2675
2676 NYI_assert (31, 21, 0x270);
2677 NYI_assert (15, 10, 0x07);
2678
2679 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2680 if (INSTR (16, 16))
2681 {
2682 index = INSTR (20, 17);
2683 aarch64_set_vec_u8 (cpu, vd, index,
2684 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2685 }
2686 else if (INSTR (17, 17))
2687 {
2688 index = INSTR (20, 18);
2689 aarch64_set_vec_u16 (cpu, vd, index,
2690 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2691 }
2692 else if (INSTR (18, 18))
2693 {
2694 index = INSTR (20, 19);
2695 aarch64_set_vec_u32 (cpu, vd, index,
2696 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2697 }
2698 else if (INSTR (19, 19))
2699 {
2700 index = INSTR (20, 20);
2701 aarch64_set_vec_u64 (cpu, vd, index,
2702 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2703 }
2704 else
2705 HALT_NYI;
2706 }
2707
2708 static void
2709 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2710 {
2711 /* instr[31] = 0
2712 instr[30] = half(0)/full(1)
2713 instr[29,21] = 00 1110 000
2714 instr[20,16] = element size and index
2715 instr[15,10] = 0000 01
2716 instr[9,5] = V source
2717 instr[4,0] = V dest. */
2718
2719 unsigned full = INSTR (30, 30);
2720 unsigned vs = INSTR (9, 5);
2721 unsigned vd = INSTR (4, 0);
2722 int i, index;
2723
2724 NYI_assert (29, 21, 0x070);
2725 NYI_assert (15, 10, 0x01);
2726
2727 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2728 if (INSTR (16, 16))
2729 {
2730 index = INSTR (20, 17);
2731
2732 for (i = 0; i < (full ? 16 : 8); i++)
2733 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2734 }
2735 else if (INSTR (17, 17))
2736 {
2737 index = INSTR (20, 18);
2738
2739 for (i = 0; i < (full ? 8 : 4); i++)
2740 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2741 }
2742 else if (INSTR (18, 18))
2743 {
2744 index = INSTR (20, 19);
2745
2746 for (i = 0; i < (full ? 4 : 2); i++)
2747 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2748 }
2749 else
2750 {
2751 if (INSTR (19, 19) == 0)
2752 HALT_UNALLOC;
2753
2754 if (! full)
2755 HALT_UNALLOC;
2756
2757 index = INSTR (20, 20);
2758
2759 for (i = 0; i < 2; i++)
2760 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2761 }
2762 }
2763
2764 static void
2765 do_vec_TBL (sim_cpu *cpu)
2766 {
2767 /* instr[31] = 0
2768 instr[30] = half(0)/full(1)
2769 instr[29,21] = 00 1110 000
2770 instr[20,16] = Vm
2771 instr[15] = 0
2772 instr[14,13] = vec length
2773 instr[12,10] = 000
2774 instr[9,5] = V start
2775 instr[4,0] = V dest */
2776
2777 int full = INSTR (30, 30);
2778 int len = INSTR (14, 13) + 1;
2779 unsigned vm = INSTR (20, 16);
2780 unsigned vn = INSTR (9, 5);
2781 unsigned vd = INSTR (4, 0);
2782 unsigned i;
2783
2784 NYI_assert (29, 21, 0x070);
2785 NYI_assert (12, 10, 0);
2786
2787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2788 for (i = 0; i < (full ? 16 : 8); i++)
2789 {
2790 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2791 uint8_t val;
2792
2793 if (selector < 16)
2794 val = aarch64_get_vec_u8 (cpu, vn, selector);
2795 else if (selector < 32)
2796 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2797 else if (selector < 48)
2798 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2799 else if (selector < 64)
2800 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2801 else
2802 val = 0;
2803
2804 aarch64_set_vec_u8 (cpu, vd, i, val);
2805 }
2806 }
2807
2808 static void
2809 do_vec_TRN (sim_cpu *cpu)
2810 {
2811 /* instr[31] = 0
2812 instr[30] = half(0)/full(1)
2813 instr[29,24] = 00 1110
2814 instr[23,22] = size
2815 instr[21] = 0
2816 instr[20,16] = Vm
2817 instr[15] = 0
2818 instr[14] = TRN1 (0) / TRN2 (1)
2819 instr[13,10] = 1010
2820 instr[9,5] = V source
2821 instr[4,0] = V dest. */
2822
2823 int full = INSTR (30, 30);
2824 int second = INSTR (14, 14);
2825 unsigned vm = INSTR (20, 16);
2826 unsigned vn = INSTR (9, 5);
2827 unsigned vd = INSTR (4, 0);
2828 unsigned i;
2829
2830 NYI_assert (29, 24, 0x0E);
2831 NYI_assert (13, 10, 0xA);
2832
2833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2834 switch (INSTR (23, 22))
2835 {
2836 case 0:
2837 for (i = 0; i < (full ? 8 : 4); i++)
2838 {
2839 aarch64_set_vec_u8
2840 (cpu, vd, i * 2,
2841 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2842 aarch64_set_vec_u8
2843 (cpu, vd, 1 * 2 + 1,
2844 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2845 }
2846 break;
2847
2848 case 1:
2849 for (i = 0; i < (full ? 4 : 2); i++)
2850 {
2851 aarch64_set_vec_u16
2852 (cpu, vd, i * 2,
2853 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2854 aarch64_set_vec_u16
2855 (cpu, vd, 1 * 2 + 1,
2856 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2857 }
2858 break;
2859
2860 case 2:
2861 aarch64_set_vec_u32
2862 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2863 aarch64_set_vec_u32
2864 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2865 aarch64_set_vec_u32
2866 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2867 aarch64_set_vec_u32
2868 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2869 break;
2870
2871 case 3:
2872 if (! full)
2873 HALT_UNALLOC;
2874
2875 aarch64_set_vec_u64 (cpu, vd, 0,
2876 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2877 aarch64_set_vec_u64 (cpu, vd, 1,
2878 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2879 break;
2880 }
2881 }
2882
2883 static void
2884 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2885 {
2886 /* instr[31] = 0
2887 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2888 [must be 1 for 64-bit xfer]
2889 instr[29,20] = 00 1110 0000
2890 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2891 0100=> 32-bits. 1000=>64-bits
2892 instr[15,10] = 0000 11
2893 instr[9,5] = W source
2894 instr[4,0] = V dest. */
2895
2896 unsigned i;
2897 unsigned Vd = INSTR (4, 0);
2898 unsigned Rs = INSTR (9, 5);
2899 int both = INSTR (30, 30);
2900
2901 NYI_assert (29, 20, 0x0E0);
2902 NYI_assert (15, 10, 0x03);
2903
2904 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2905 switch (INSTR (19, 16))
2906 {
2907 case 1:
2908 for (i = 0; i < (both ? 16 : 8); i++)
2909 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
2910 break;
2911
2912 case 2:
2913 for (i = 0; i < (both ? 8 : 4); i++)
2914 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
2915 break;
2916
2917 case 4:
2918 for (i = 0; i < (both ? 4 : 2); i++)
2919 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
2920 break;
2921
2922 case 8:
2923 if (!both)
2924 HALT_NYI;
2925 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2926 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2927 break;
2928
2929 default:
2930 HALT_NYI;
2931 }
2932 }
2933
2934 static void
2935 do_vec_UZP (sim_cpu *cpu)
2936 {
2937 /* instr[31] = 0
2938 instr[30] = half(0)/full(1)
2939 instr[29,24] = 00 1110
2940 instr[23,22] = size: byte(00), half(01), word (10), long (11)
2941 instr[21] = 0
2942 instr[20,16] = Vm
2943 instr[15] = 0
2944 instr[14] = lower (0) / upper (1)
2945 instr[13,10] = 0110
2946 instr[9,5] = Vn
2947 instr[4,0] = Vd. */
2948
2949 int full = INSTR (30, 30);
2950 int upper = INSTR (14, 14);
2951
2952 unsigned vm = INSTR (20, 16);
2953 unsigned vn = INSTR (9, 5);
2954 unsigned vd = INSTR (4, 0);
2955
2956 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2957 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2958 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2959 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2960
2961 uint64_t val1;
2962 uint64_t val2;
2963
2964 uint64_t input2 = full ? val_n2 : val_m1;
2965
2966 NYI_assert (29, 24, 0x0E);
2967 NYI_assert (21, 21, 0);
2968 NYI_assert (15, 15, 0);
2969 NYI_assert (13, 10, 6);
2970
2971 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2972 switch (INSTR (23, 22))
2973 {
2974 case 0:
2975 val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
2976 val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
2977 val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
2978 val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
2979
2980 val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
2981 val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
2982 val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
2983 val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
2984
2985 if (full)
2986 {
2987 val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
2988 val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
2989 val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
2990 val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
2991
2992 val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
2993 val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
2994 val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
2995 val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
2996 }
2997 break;
2998
2999 case 1:
3000 val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3001 val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3002
3003 val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3004 val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3005
3006 if (full)
3007 {
3008 val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3009 val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3010
3011 val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3012 val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3013 }
3014 break;
3015
3016 case 2:
3017 val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3018 val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3019
3020 if (full)
3021 {
3022 val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3023 val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3024 }
3025 break;
3026
3027 case 3:
3028 if (! full)
3029 HALT_UNALLOC;
3030
3031 val1 = upper ? val_n2 : val_n1;
3032 val2 = upper ? val_m2 : val_m1;
3033 break;
3034 }
3035
3036 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3037 if (full)
3038 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3039 }
3040
3041 static void
3042 do_vec_ZIP (sim_cpu *cpu)
3043 {
3044 /* instr[31] = 0
3045 instr[30] = half(0)/full(1)
3046 instr[29,24] = 00 1110
3047 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3048 instr[21] = 0
3049 instr[20,16] = Vm
3050 instr[15] = 0
3051 instr[14] = lower (0) / upper (1)
3052 instr[13,10] = 1110
3053 instr[9,5] = Vn
3054 instr[4,0] = Vd. */
3055
3056 int full = INSTR (30, 30);
3057 int upper = INSTR (14, 14);
3058
3059 unsigned vm = INSTR (20, 16);
3060 unsigned vn = INSTR (9, 5);
3061 unsigned vd = INSTR (4, 0);
3062
3063 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3064 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3065 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3066 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3067
3068 uint64_t val1 = 0;
3069 uint64_t val2 = 0;
3070
3071 uint64_t input1 = upper ? val_n1 : val_m1;
3072 uint64_t input2 = upper ? val_n2 : val_m2;
3073
3074 NYI_assert (29, 24, 0x0E);
3075 NYI_assert (21, 21, 0);
3076 NYI_assert (15, 15, 0);
3077 NYI_assert (13, 10, 0xE);
3078
3079 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3080 switch (INSTR (23, 23))
3081 {
3082 case 0:
3083 val1 =
3084 ((input1 << 0) & (0xFF << 0))
3085 | ((input2 << 8) & (0xFF << 8))
3086 | ((input1 << 8) & (0xFF << 16))
3087 | ((input2 << 16) & (0xFF << 24))
3088 | ((input1 << 16) & (0xFFULL << 32))
3089 | ((input2 << 24) & (0xFFULL << 40))
3090 | ((input1 << 24) & (0xFFULL << 48))
3091 | ((input2 << 32) & (0xFFULL << 56));
3092
3093 val2 =
3094 ((input1 >> 32) & (0xFF << 0))
3095 | ((input2 >> 24) & (0xFF << 8))
3096 | ((input1 >> 24) & (0xFF << 16))
3097 | ((input2 >> 16) & (0xFF << 24))
3098 | ((input1 >> 16) & (0xFFULL << 32))
3099 | ((input2 >> 8) & (0xFFULL << 40))
3100 | ((input1 >> 8) & (0xFFULL << 48))
3101 | ((input2 >> 0) & (0xFFULL << 56));
3102 break;
3103
3104 case 1:
3105 val1 =
3106 ((input1 << 0) & (0xFFFF << 0))
3107 | ((input2 << 16) & (0xFFFF << 16))
3108 | ((input1 << 16) & (0xFFFFULL << 32))
3109 | ((input2 << 32) & (0xFFFFULL << 48));
3110
3111 val2 =
3112 ((input1 >> 32) & (0xFFFF << 0))
3113 | ((input2 >> 16) & (0xFFFF << 16))
3114 | ((input1 >> 16) & (0xFFFFULL << 32))
3115 | ((input2 >> 0) & (0xFFFFULL << 48));
3116 break;
3117
3118 case 2:
3119 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3120 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3121 break;
3122
3123 case 3:
3124 val1 = input1;
3125 val2 = input2;
3126 break;
3127 }
3128
3129 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3130 if (full)
3131 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3132 }
3133
3134 /* Floating point immediates are encoded in 8 bits.
3135 fpimm[7] = sign bit.
3136 fpimm[6:4] = signed exponent.
3137 fpimm[3:0] = fraction (assuming leading 1).
3138 i.e. F = s * 1.f * 2^(e - b). */
3139
3140 static float
3141 fp_immediate_for_encoding_32 (uint32_t imm8)
3142 {
3143 float u;
3144 uint32_t s, e, f, i;
3145
3146 s = (imm8 >> 7) & 0x1;
3147 e = (imm8 >> 4) & 0x7;
3148 f = imm8 & 0xf;
3149
3150 /* The fp value is s * n/16 * 2r where n is 16+e. */
3151 u = (16.0 + f) / 16.0;
3152
3153 /* N.B. exponent is signed. */
3154 if (e < 4)
3155 {
3156 int epos = e;
3157
3158 for (i = 0; i <= epos; i++)
3159 u *= 2.0;
3160 }
3161 else
3162 {
3163 int eneg = 7 - e;
3164
3165 for (i = 0; i < eneg; i++)
3166 u /= 2.0;
3167 }
3168
3169 if (s)
3170 u = - u;
3171
3172 return u;
3173 }
3174
3175 static double
3176 fp_immediate_for_encoding_64 (uint32_t imm8)
3177 {
3178 double u;
3179 uint32_t s, e, f, i;
3180
3181 s = (imm8 >> 7) & 0x1;
3182 e = (imm8 >> 4) & 0x7;
3183 f = imm8 & 0xf;
3184
3185 /* The fp value is s * n/16 * 2r where n is 16+e. */
3186 u = (16.0 + f) / 16.0;
3187
3188 /* N.B. exponent is signed. */
3189 if (e < 4)
3190 {
3191 int epos = e;
3192
3193 for (i = 0; i <= epos; i++)
3194 u *= 2.0;
3195 }
3196 else
3197 {
3198 int eneg = 7 - e;
3199
3200 for (i = 0; i < eneg; i++)
3201 u /= 2.0;
3202 }
3203
3204 if (s)
3205 u = - u;
3206
3207 return u;
3208 }
3209
3210 static void
3211 do_vec_MOV_immediate (sim_cpu *cpu)
3212 {
3213 /* instr[31] = 0
3214 instr[30] = full/half selector
3215 instr[29,19] = 00111100000
3216 instr[18,16] = high 3 bits of uimm8
3217 instr[15,12] = size & shift:
3218 0000 => 32-bit
3219 0010 => 32-bit + LSL#8
3220 0100 => 32-bit + LSL#16
3221 0110 => 32-bit + LSL#24
3222 1010 => 16-bit + LSL#8
3223 1000 => 16-bit
3224 1101 => 32-bit + MSL#16
3225 1100 => 32-bit + MSL#8
3226 1110 => 8-bit
3227 1111 => double
3228 instr[11,10] = 01
3229 instr[9,5] = low 5-bits of uimm8
3230 instr[4,0] = Vd. */
3231
3232 int full = INSTR (30, 30);
3233 unsigned vd = INSTR (4, 0);
3234 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3235 unsigned i;
3236
3237 NYI_assert (29, 19, 0x1E0);
3238 NYI_assert (11, 10, 1);
3239
3240 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3241 switch (INSTR (15, 12))
3242 {
3243 case 0x0: /* 32-bit, no shift. */
3244 case 0x2: /* 32-bit, shift by 8. */
3245 case 0x4: /* 32-bit, shift by 16. */
3246 case 0x6: /* 32-bit, shift by 24. */
3247 val <<= (8 * INSTR (14, 13));
3248 for (i = 0; i < (full ? 4 : 2); i++)
3249 aarch64_set_vec_u32 (cpu, vd, i, val);
3250 break;
3251
3252 case 0xa: /* 16-bit, shift by 8. */
3253 val <<= 8;
3254 /* Fall through. */
3255 case 0x8: /* 16-bit, no shift. */
3256 for (i = 0; i < (full ? 8 : 4); i++)
3257 aarch64_set_vec_u16 (cpu, vd, i, val);
3258 break;
3259
3260 case 0xd: /* 32-bit, mask shift by 16. */
3261 val <<= 8;
3262 val |= 0xFF;
3263 /* Fall through. */
3264 case 0xc: /* 32-bit, mask shift by 8. */
3265 val <<= 8;
3266 val |= 0xFF;
3267 for (i = 0; i < (full ? 4 : 2); i++)
3268 aarch64_set_vec_u32 (cpu, vd, i, val);
3269 break;
3270
3271 case 0xe: /* 8-bit, no shift. */
3272 for (i = 0; i < (full ? 16 : 8); i++)
3273 aarch64_set_vec_u8 (cpu, vd, i, val);
3274 break;
3275
3276 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3277 {
3278 float u = fp_immediate_for_encoding_32 (val);
3279 for (i = 0; i < (full ? 4 : 2); i++)
3280 aarch64_set_vec_float (cpu, vd, i, u);
3281 break;
3282 }
3283
3284 default:
3285 HALT_NYI;
3286 }
3287 }
3288
3289 static void
3290 do_vec_MVNI (sim_cpu *cpu)
3291 {
3292 /* instr[31] = 0
3293 instr[30] = full/half selector
3294 instr[29,19] = 10111100000
3295 instr[18,16] = high 3 bits of uimm8
3296 instr[15,12] = selector
3297 instr[11,10] = 01
3298 instr[9,5] = low 5-bits of uimm8
3299 instr[4,0] = Vd. */
3300
3301 int full = INSTR (30, 30);
3302 unsigned vd = INSTR (4, 0);
3303 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3304 unsigned i;
3305
3306 NYI_assert (29, 19, 0x5E0);
3307 NYI_assert (11, 10, 1);
3308
3309 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3310 switch (INSTR (15, 12))
3311 {
3312 case 0x0: /* 32-bit, no shift. */
3313 case 0x2: /* 32-bit, shift by 8. */
3314 case 0x4: /* 32-bit, shift by 16. */
3315 case 0x6: /* 32-bit, shift by 24. */
3316 val <<= (8 * INSTR (14, 13));
3317 val = ~ val;
3318 for (i = 0; i < (full ? 4 : 2); i++)
3319 aarch64_set_vec_u32 (cpu, vd, i, val);
3320 return;
3321
3322 case 0xa: /* 16-bit, 8 bit shift. */
3323 val <<= 8;
3324 case 0x8: /* 16-bit, no shift. */
3325 val = ~ val;
3326 for (i = 0; i < (full ? 8 : 4); i++)
3327 aarch64_set_vec_u16 (cpu, vd, i, val);
3328 return;
3329
3330 case 0xd: /* 32-bit, mask shift by 16. */
3331 val <<= 8;
3332 val |= 0xFF;
3333 case 0xc: /* 32-bit, mask shift by 8. */
3334 val <<= 8;
3335 val |= 0xFF;
3336 val = ~ val;
3337 for (i = 0; i < (full ? 4 : 2); i++)
3338 aarch64_set_vec_u32 (cpu, vd, i, val);
3339 return;
3340
3341 case 0xE: /* MOVI Dn, #mask64 */
3342 {
3343 uint64_t mask = 0;
3344
3345 for (i = 0; i < 8; i++)
3346 if (val & (1 << i))
3347 mask |= (0xFFUL << (i * 8));
3348 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3349 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3350 return;
3351 }
3352
3353 case 0xf: /* FMOV Vd.2D, #fpimm. */
3354 {
3355 double u = fp_immediate_for_encoding_64 (val);
3356
3357 if (! full)
3358 HALT_UNALLOC;
3359
3360 aarch64_set_vec_double (cpu, vd, 0, u);
3361 aarch64_set_vec_double (cpu, vd, 1, u);
3362 return;
3363 }
3364
3365 default:
3366 HALT_NYI;
3367 }
3368 }
3369
3370 #define ABS(A) ((A) < 0 ? - (A) : (A))
3371
3372 static void
3373 do_vec_ABS (sim_cpu *cpu)
3374 {
3375 /* instr[31] = 0
3376 instr[30] = half(0)/full(1)
3377 instr[29,24] = 00 1110
3378 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3379 instr[21,10] = 10 0000 1011 10
3380 instr[9,5] = Vn
3381 instr[4.0] = Vd. */
3382
3383 unsigned vn = INSTR (9, 5);
3384 unsigned vd = INSTR (4, 0);
3385 unsigned full = INSTR (30, 30);
3386 unsigned i;
3387
3388 NYI_assert (29, 24, 0x0E);
3389 NYI_assert (21, 10, 0x82E);
3390
3391 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3392 switch (INSTR (23, 22))
3393 {
3394 case 0:
3395 for (i = 0; i < (full ? 16 : 8); i++)
3396 aarch64_set_vec_s8 (cpu, vd, i,
3397 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3398 break;
3399
3400 case 1:
3401 for (i = 0; i < (full ? 8 : 4); i++)
3402 aarch64_set_vec_s16 (cpu, vd, i,
3403 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3404 break;
3405
3406 case 2:
3407 for (i = 0; i < (full ? 4 : 2); i++)
3408 aarch64_set_vec_s32 (cpu, vd, i,
3409 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3410 break;
3411
3412 case 3:
3413 if (! full)
3414 HALT_NYI;
3415 for (i = 0; i < 2; i++)
3416 aarch64_set_vec_s64 (cpu, vd, i,
3417 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3418 break;
3419 }
3420 }
3421
3422 static void
3423 do_vec_ADDV (sim_cpu *cpu)
3424 {
3425 /* instr[31] = 0
3426 instr[30] = full/half selector
3427 instr[29,24] = 00 1110
3428 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3429 instr[21,10] = 11 0001 1011 10
3430 instr[9,5] = Vm
3431 instr[4.0] = Rd. */
3432
3433 unsigned vm = INSTR (9, 5);
3434 unsigned rd = INSTR (4, 0);
3435 unsigned i;
3436 uint64_t val = 0;
3437 int full = INSTR (30, 30);
3438
3439 NYI_assert (29, 24, 0x0E);
3440 NYI_assert (21, 10, 0xC6E);
3441
3442 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3443 switch (INSTR (23, 22))
3444 {
3445 case 0:
3446 for (i = 0; i < (full ? 16 : 8); i++)
3447 val += aarch64_get_vec_u8 (cpu, vm, i);
3448 aarch64_set_vec_u64 (cpu, rd, 0, val);
3449 return;
3450
3451 case 1:
3452 for (i = 0; i < (full ? 8 : 4); i++)
3453 val += aarch64_get_vec_u16 (cpu, vm, i);
3454 aarch64_set_vec_u64 (cpu, rd, 0, val);
3455 return;
3456
3457 case 2:
3458 if (! full)
3459 HALT_UNALLOC;
3460 for (i = 0; i < 4; i++)
3461 val += aarch64_get_vec_u32 (cpu, vm, i);
3462 aarch64_set_vec_u64 (cpu, rd, 0, val);
3463 return;
3464
3465 case 3:
3466 HALT_UNALLOC;
3467 }
3468 }
3469
3470 static void
3471 do_vec_ins_2 (sim_cpu *cpu)
3472 {
3473 /* instr[31,21] = 01001110000
3474 instr[20,18] = size & element selector
3475 instr[17,14] = 0000
3476 instr[13] = direction: to vec(0), from vec (1)
3477 instr[12,10] = 111
3478 instr[9,5] = Vm
3479 instr[4,0] = Vd. */
3480
3481 unsigned elem;
3482 unsigned vm = INSTR (9, 5);
3483 unsigned vd = INSTR (4, 0);
3484
3485 NYI_assert (31, 21, 0x270);
3486 NYI_assert (17, 14, 0);
3487 NYI_assert (12, 10, 7);
3488
3489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3490 if (INSTR (13, 13) == 1)
3491 {
3492 if (INSTR (18, 18) == 1)
3493 {
3494 /* 32-bit moves. */
3495 elem = INSTR (20, 19);
3496 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3497 aarch64_get_vec_u32 (cpu, vm, elem));
3498 }
3499 else
3500 {
3501 /* 64-bit moves. */
3502 if (INSTR (19, 19) != 1)
3503 HALT_NYI;
3504
3505 elem = INSTR (20, 20);
3506 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3507 aarch64_get_vec_u64 (cpu, vm, elem));
3508 }
3509 }
3510 else
3511 {
3512 if (INSTR (18, 18) == 1)
3513 {
3514 /* 32-bit moves. */
3515 elem = INSTR (20, 19);
3516 aarch64_set_vec_u32 (cpu, vd, elem,
3517 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3518 }
3519 else
3520 {
3521 /* 64-bit moves. */
3522 if (INSTR (19, 19) != 1)
3523 HALT_NYI;
3524
3525 elem = INSTR (20, 20);
3526 aarch64_set_vec_u64 (cpu, vd, elem,
3527 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3528 }
3529 }
3530 }
3531
3532 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3533 do \
3534 { \
3535 DST_TYPE a[N], b[N]; \
3536 \
3537 for (i = 0; i < (N); i++) \
3538 { \
3539 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3540 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3541 } \
3542 for (i = 0; i < (N); i++) \
3543 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3544 } \
3545 while (0)
3546
3547 static void
3548 do_vec_mull (sim_cpu *cpu)
3549 {
3550 /* instr[31] = 0
3551 instr[30] = lower(0)/upper(1) selector
3552 instr[29] = signed(0)/unsigned(1)
3553 instr[28,24] = 0 1110
3554 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3555 instr[21] = 1
3556 instr[20,16] = Vm
3557 instr[15,10] = 11 0000
3558 instr[9,5] = Vn
3559 instr[4.0] = Vd. */
3560
3561 int unsign = INSTR (29, 29);
3562 int bias = INSTR (30, 30);
3563 unsigned vm = INSTR (20, 16);
3564 unsigned vn = INSTR ( 9, 5);
3565 unsigned vd = INSTR ( 4, 0);
3566 unsigned i;
3567
3568 NYI_assert (28, 24, 0x0E);
3569 NYI_assert (15, 10, 0x30);
3570
3571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3572 /* NB: Read source values before writing results, in case
3573 the source and destination vectors are the same. */
3574 switch (INSTR (23, 22))
3575 {
3576 case 0:
3577 if (bias)
3578 bias = 8;
3579 if (unsign)
3580 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3581 else
3582 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3583 return;
3584
3585 case 1:
3586 if (bias)
3587 bias = 4;
3588 if (unsign)
3589 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3590 else
3591 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3592 return;
3593
3594 case 2:
3595 if (bias)
3596 bias = 2;
3597 if (unsign)
3598 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3599 else
3600 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3601 return;
3602
3603 case 3:
3604 HALT_NYI;
3605 }
3606 }
3607
3608 static void
3609 do_vec_fadd (sim_cpu *cpu)
3610 {
3611 /* instr[31] = 0
3612 instr[30] = half(0)/full(1)
3613 instr[29,24] = 001110
3614 instr[23] = FADD(0)/FSUB(1)
3615 instr[22] = float (0)/double(1)
3616 instr[21] = 1
3617 instr[20,16] = Vm
3618 instr[15,10] = 110101
3619 instr[9,5] = Vn
3620 instr[4.0] = Vd. */
3621
3622 unsigned vm = INSTR (20, 16);
3623 unsigned vn = INSTR (9, 5);
3624 unsigned vd = INSTR (4, 0);
3625 unsigned i;
3626 int full = INSTR (30, 30);
3627
3628 NYI_assert (29, 24, 0x0E);
3629 NYI_assert (21, 21, 1);
3630 NYI_assert (15, 10, 0x35);
3631
3632 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3633 if (INSTR (23, 23))
3634 {
3635 if (INSTR (22, 22))
3636 {
3637 if (! full)
3638 HALT_NYI;
3639
3640 for (i = 0; i < 2; i++)
3641 aarch64_set_vec_double (cpu, vd, i,
3642 aarch64_get_vec_double (cpu, vn, i)
3643 - aarch64_get_vec_double (cpu, vm, i));
3644 }
3645 else
3646 {
3647 for (i = 0; i < (full ? 4 : 2); i++)
3648 aarch64_set_vec_float (cpu, vd, i,
3649 aarch64_get_vec_float (cpu, vn, i)
3650 - aarch64_get_vec_float (cpu, vm, i));
3651 }
3652 }
3653 else
3654 {
3655 if (INSTR (22, 22))
3656 {
3657 if (! full)
3658 HALT_NYI;
3659
3660 for (i = 0; i < 2; i++)
3661 aarch64_set_vec_double (cpu, vd, i,
3662 aarch64_get_vec_double (cpu, vm, i)
3663 + aarch64_get_vec_double (cpu, vn, i));
3664 }
3665 else
3666 {
3667 for (i = 0; i < (full ? 4 : 2); i++)
3668 aarch64_set_vec_float (cpu, vd, i,
3669 aarch64_get_vec_float (cpu, vm, i)
3670 + aarch64_get_vec_float (cpu, vn, i));
3671 }
3672 }
3673 }
3674
3675 static void
3676 do_vec_add (sim_cpu *cpu)
3677 {
3678 /* instr[31] = 0
3679 instr[30] = full/half selector
3680 instr[29,24] = 001110
3681 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3682 instr[21] = 1
3683 instr[20,16] = Vn
3684 instr[15,10] = 100001
3685 instr[9,5] = Vm
3686 instr[4.0] = Vd. */
3687
3688 unsigned vm = INSTR (20, 16);
3689 unsigned vn = INSTR (9, 5);
3690 unsigned vd = INSTR (4, 0);
3691 unsigned i;
3692 int full = INSTR (30, 30);
3693
3694 NYI_assert (29, 24, 0x0E);
3695 NYI_assert (21, 21, 1);
3696 NYI_assert (15, 10, 0x21);
3697
3698 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3699 switch (INSTR (23, 22))
3700 {
3701 case 0:
3702 for (i = 0; i < (full ? 16 : 8); i++)
3703 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3704 + aarch64_get_vec_u8 (cpu, vm, i));
3705 return;
3706
3707 case 1:
3708 for (i = 0; i < (full ? 8 : 4); i++)
3709 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3710 + aarch64_get_vec_u16 (cpu, vm, i));
3711 return;
3712
3713 case 2:
3714 for (i = 0; i < (full ? 4 : 2); i++)
3715 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3716 + aarch64_get_vec_u32 (cpu, vm, i));
3717 return;
3718
3719 case 3:
3720 if (! full)
3721 HALT_UNALLOC;
3722 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3723 + aarch64_get_vec_u64 (cpu, vm, 0));
3724 aarch64_set_vec_u64 (cpu, vd, 1,
3725 aarch64_get_vec_u64 (cpu, vn, 1)
3726 + aarch64_get_vec_u64 (cpu, vm, 1));
3727 return;
3728 }
3729 }
3730
3731 static void
3732 do_vec_mul (sim_cpu *cpu)
3733 {
3734 /* instr[31] = 0
3735 instr[30] = full/half selector
3736 instr[29,24] = 00 1110
3737 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3738 instr[21] = 1
3739 instr[20,16] = Vn
3740 instr[15,10] = 10 0111
3741 instr[9,5] = Vm
3742 instr[4.0] = Vd. */
3743
3744 unsigned vm = INSTR (20, 16);
3745 unsigned vn = INSTR (9, 5);
3746 unsigned vd = INSTR (4, 0);
3747 unsigned i;
3748 int full = INSTR (30, 30);
3749 int bias = 0;
3750
3751 NYI_assert (29, 24, 0x0E);
3752 NYI_assert (21, 21, 1);
3753 NYI_assert (15, 10, 0x27);
3754
3755 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3756 switch (INSTR (23, 22))
3757 {
3758 case 0:
3759 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3760 return;
3761
3762 case 1:
3763 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3764 return;
3765
3766 case 2:
3767 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3768 return;
3769
3770 case 3:
3771 HALT_UNALLOC;
3772 }
3773 }
3774
3775 static void
3776 do_vec_MLA (sim_cpu *cpu)
3777 {
3778 /* instr[31] = 0
3779 instr[30] = full/half selector
3780 instr[29,24] = 00 1110
3781 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3782 instr[21] = 1
3783 instr[20,16] = Vn
3784 instr[15,10] = 1001 01
3785 instr[9,5] = Vm
3786 instr[4.0] = Vd. */
3787
3788 unsigned vm = INSTR (20, 16);
3789 unsigned vn = INSTR (9, 5);
3790 unsigned vd = INSTR (4, 0);
3791 unsigned i;
3792 int full = INSTR (30, 30);
3793
3794 NYI_assert (29, 24, 0x0E);
3795 NYI_assert (21, 21, 1);
3796 NYI_assert (15, 10, 0x25);
3797
3798 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3799 switch (INSTR (23, 22))
3800 {
3801 case 0:
3802 {
3803 uint16_t a[16], b[16];
3804
3805 for (i = 0; i < (full ? 16 : 8); i++)
3806 {
3807 a[i] = aarch64_get_vec_u8 (cpu, vn, i);
3808 b[i] = aarch64_get_vec_u8 (cpu, vm, i);
3809 }
3810
3811 for (i = 0; i < (full ? 16 : 8); i++)
3812 {
3813 uint16_t v = aarch64_get_vec_u8 (cpu, vd, i);
3814
3815 aarch64_set_vec_u16 (cpu, vd, i, v + (a[i] * b[i]));
3816 }
3817 }
3818 return;
3819
3820 case 1:
3821 {
3822 uint32_t a[8], b[8];
3823
3824 for (i = 0; i < (full ? 8 : 4); i++)
3825 {
3826 a[i] = aarch64_get_vec_u16 (cpu, vn, i);
3827 b[i] = aarch64_get_vec_u16 (cpu, vm, i);
3828 }
3829
3830 for (i = 0; i < (full ? 8 : 4); i++)
3831 {
3832 uint32_t v = aarch64_get_vec_u16 (cpu, vd, i);
3833
3834 aarch64_set_vec_u32 (cpu, vd, i, v + (a[i] * b[i]));
3835 }
3836 }
3837 return;
3838
3839 case 2:
3840 {
3841 uint64_t a[4], b[4];
3842
3843 for (i = 0; i < (full ? 4 : 2); i++)
3844 {
3845 a[i] = aarch64_get_vec_u32 (cpu, vn, i);
3846 b[i] = aarch64_get_vec_u32 (cpu, vm, i);
3847 }
3848
3849 for (i = 0; i < (full ? 4 : 2); i++)
3850 {
3851 uint64_t v = aarch64_get_vec_u32 (cpu, vd, i);
3852
3853 aarch64_set_vec_u64 (cpu, vd, i, v + (a[i] * b[i]));
3854 }
3855 }
3856 return;
3857
3858 case 3:
3859 HALT_UNALLOC;
3860 }
3861 }
3862
3863 static float
3864 fmaxnm (float a, float b)
3865 {
3866 if (! isnan (a))
3867 {
3868 if (! isnan (b))
3869 return a > b ? a : b;
3870 return a;
3871 }
3872 else if (! isnan (b))
3873 return b;
3874 return a;
3875 }
3876
3877 static float
3878 fminnm (float a, float b)
3879 {
3880 if (! isnan (a))
3881 {
3882 if (! isnan (b))
3883 return a < b ? a : b;
3884 return a;
3885 }
3886 else if (! isnan (b))
3887 return b;
3888 return a;
3889 }
3890
3891 static double
3892 dmaxnm (double a, double b)
3893 {
3894 if (! isnan (a))
3895 {
3896 if (! isnan (b))
3897 return a > b ? a : b;
3898 return a;
3899 }
3900 else if (! isnan (b))
3901 return b;
3902 return a;
3903 }
3904
3905 static double
3906 dminnm (double a, double b)
3907 {
3908 if (! isnan (a))
3909 {
3910 if (! isnan (b))
3911 return a < b ? a : b;
3912 return a;
3913 }
3914 else if (! isnan (b))
3915 return b;
3916 return a;
3917 }
3918
3919 static void
3920 do_vec_FminmaxNMP (sim_cpu *cpu)
3921 {
3922 /* instr [31] = 0
3923 instr [30] = half (0)/full (1)
3924 instr [29,24] = 10 1110
3925 instr [23] = max(0)/min(1)
3926 instr [22] = float (0)/double (1)
3927 instr [21] = 1
3928 instr [20,16] = Vn
3929 instr [15,10] = 1100 01
3930 instr [9,5] = Vm
3931 instr [4.0] = Vd. */
3932
3933 unsigned vm = INSTR (20, 16);
3934 unsigned vn = INSTR (9, 5);
3935 unsigned vd = INSTR (4, 0);
3936 int full = INSTR (30, 30);
3937
3938 NYI_assert (29, 24, 0x2E);
3939 NYI_assert (21, 21, 1);
3940 NYI_assert (15, 10, 0x31);
3941
3942 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3943 if (INSTR (22, 22))
3944 {
3945 double (* fn)(double, double) = INSTR (23, 23)
3946 ? dminnm : dmaxnm;
3947
3948 if (! full)
3949 HALT_NYI;
3950 aarch64_set_vec_double (cpu, vd, 0,
3951 fn (aarch64_get_vec_double (cpu, vn, 0),
3952 aarch64_get_vec_double (cpu, vn, 1)));
3953 aarch64_set_vec_double (cpu, vd, 0,
3954 fn (aarch64_get_vec_double (cpu, vm, 0),
3955 aarch64_get_vec_double (cpu, vm, 1)));
3956 }
3957 else
3958 {
3959 float (* fn)(float, float) = INSTR (23, 23)
3960 ? fminnm : fmaxnm;
3961
3962 aarch64_set_vec_float (cpu, vd, 0,
3963 fn (aarch64_get_vec_float (cpu, vn, 0),
3964 aarch64_get_vec_float (cpu, vn, 1)));
3965 if (full)
3966 aarch64_set_vec_float (cpu, vd, 1,
3967 fn (aarch64_get_vec_float (cpu, vn, 2),
3968 aarch64_get_vec_float (cpu, vn, 3)));
3969
3970 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
3971 fn (aarch64_get_vec_float (cpu, vm, 0),
3972 aarch64_get_vec_float (cpu, vm, 1)));
3973 if (full)
3974 aarch64_set_vec_float (cpu, vd, 3,
3975 fn (aarch64_get_vec_float (cpu, vm, 2),
3976 aarch64_get_vec_float (cpu, vm, 3)));
3977 }
3978 }
3979
3980 static void
3981 do_vec_AND (sim_cpu *cpu)
3982 {
3983 /* instr[31] = 0
3984 instr[30] = half (0)/full (1)
3985 instr[29,21] = 001110001
3986 instr[20,16] = Vm
3987 instr[15,10] = 000111
3988 instr[9,5] = Vn
3989 instr[4.0] = Vd. */
3990
3991 unsigned vm = INSTR (20, 16);
3992 unsigned vn = INSTR (9, 5);
3993 unsigned vd = INSTR (4, 0);
3994 unsigned i;
3995 int full = INSTR (30, 30);
3996
3997 NYI_assert (29, 21, 0x071);
3998 NYI_assert (15, 10, 0x07);
3999
4000 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4001 for (i = 0; i < (full ? 4 : 2); i++)
4002 aarch64_set_vec_u32 (cpu, vd, i,
4003 aarch64_get_vec_u32 (cpu, vn, i)
4004 & aarch64_get_vec_u32 (cpu, vm, i));
4005 }
4006
4007 static void
4008 do_vec_BSL (sim_cpu *cpu)
4009 {
4010 /* instr[31] = 0
4011 instr[30] = half (0)/full (1)
4012 instr[29,21] = 101110011
4013 instr[20,16] = Vm
4014 instr[15,10] = 000111
4015 instr[9,5] = Vn
4016 instr[4.0] = Vd. */
4017
4018 unsigned vm = INSTR (20, 16);
4019 unsigned vn = INSTR (9, 5);
4020 unsigned vd = INSTR (4, 0);
4021 unsigned i;
4022 int full = INSTR (30, 30);
4023
4024 NYI_assert (29, 21, 0x173);
4025 NYI_assert (15, 10, 0x07);
4026
4027 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4028 for (i = 0; i < (full ? 16 : 8); i++)
4029 aarch64_set_vec_u8 (cpu, vd, i,
4030 ( aarch64_get_vec_u8 (cpu, vd, i)
4031 & aarch64_get_vec_u8 (cpu, vn, i))
4032 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4033 & aarch64_get_vec_u8 (cpu, vm, i)));
4034 }
4035
4036 static void
4037 do_vec_EOR (sim_cpu *cpu)
4038 {
4039 /* instr[31] = 0
4040 instr[30] = half (0)/full (1)
4041 instr[29,21] = 10 1110 001
4042 instr[20,16] = Vm
4043 instr[15,10] = 000111
4044 instr[9,5] = Vn
4045 instr[4.0] = Vd. */
4046
4047 unsigned vm = INSTR (20, 16);
4048 unsigned vn = INSTR (9, 5);
4049 unsigned vd = INSTR (4, 0);
4050 unsigned i;
4051 int full = INSTR (30, 30);
4052
4053 NYI_assert (29, 21, 0x171);
4054 NYI_assert (15, 10, 0x07);
4055
4056 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4057 for (i = 0; i < (full ? 4 : 2); i++)
4058 aarch64_set_vec_u32 (cpu, vd, i,
4059 aarch64_get_vec_u32 (cpu, vn, i)
4060 ^ aarch64_get_vec_u32 (cpu, vm, i));
4061 }
4062
4063 static void
4064 do_vec_bit (sim_cpu *cpu)
4065 {
4066 /* instr[31] = 0
4067 instr[30] = half (0)/full (1)
4068 instr[29,23] = 10 1110 1
4069 instr[22] = BIT (0) / BIF (1)
4070 instr[21] = 1
4071 instr[20,16] = Vm
4072 instr[15,10] = 0001 11
4073 instr[9,5] = Vn
4074 instr[4.0] = Vd. */
4075
4076 unsigned vm = INSTR (20, 16);
4077 unsigned vn = INSTR (9, 5);
4078 unsigned vd = INSTR (4, 0);
4079 unsigned full = INSTR (30, 30);
4080 unsigned test_false = INSTR (22, 22);
4081 unsigned i;
4082
4083 NYI_assert (29, 23, 0x5D);
4084 NYI_assert (21, 21, 1);
4085 NYI_assert (15, 10, 0x07);
4086
4087 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4088 if (test_false)
4089 {
4090 for (i = 0; i < (full ? 16 : 8); i++)
4091 if (aarch64_get_vec_u32 (cpu, vn, i) == 0)
4092 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
4093 }
4094 else
4095 {
4096 for (i = 0; i < (full ? 16 : 8); i++)
4097 if (aarch64_get_vec_u32 (cpu, vn, i) != 0)
4098 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
4099 }
4100 }
4101
4102 static void
4103 do_vec_ORN (sim_cpu *cpu)
4104 {
4105 /* instr[31] = 0
4106 instr[30] = half (0)/full (1)
4107 instr[29,21] = 00 1110 111
4108 instr[20,16] = Vm
4109 instr[15,10] = 00 0111
4110 instr[9,5] = Vn
4111 instr[4.0] = Vd. */
4112
4113 unsigned vm = INSTR (20, 16);
4114 unsigned vn = INSTR (9, 5);
4115 unsigned vd = INSTR (4, 0);
4116 unsigned i;
4117 int full = INSTR (30, 30);
4118
4119 NYI_assert (29, 21, 0x077);
4120 NYI_assert (15, 10, 0x07);
4121
4122 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4123 for (i = 0; i < (full ? 16 : 8); i++)
4124 aarch64_set_vec_u8 (cpu, vd, i,
4125 aarch64_get_vec_u8 (cpu, vn, i)
4126 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4127 }
4128
4129 static void
4130 do_vec_ORR (sim_cpu *cpu)
4131 {
4132 /* instr[31] = 0
4133 instr[30] = half (0)/full (1)
4134 instr[29,21] = 00 1110 101
4135 instr[20,16] = Vm
4136 instr[15,10] = 0001 11
4137 instr[9,5] = Vn
4138 instr[4.0] = Vd. */
4139
4140 unsigned vm = INSTR (20, 16);
4141 unsigned vn = INSTR (9, 5);
4142 unsigned vd = INSTR (4, 0);
4143 unsigned i;
4144 int full = INSTR (30, 30);
4145
4146 NYI_assert (29, 21, 0x075);
4147 NYI_assert (15, 10, 0x07);
4148
4149 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4150 for (i = 0; i < (full ? 16 : 8); i++)
4151 aarch64_set_vec_u8 (cpu, vd, i,
4152 aarch64_get_vec_u8 (cpu, vn, i)
4153 | aarch64_get_vec_u8 (cpu, vm, i));
4154 }
4155
4156 static void
4157 do_vec_BIC (sim_cpu *cpu)
4158 {
4159 /* instr[31] = 0
4160 instr[30] = half (0)/full (1)
4161 instr[29,21] = 00 1110 011
4162 instr[20,16] = Vm
4163 instr[15,10] = 00 0111
4164 instr[9,5] = Vn
4165 instr[4.0] = Vd. */
4166
4167 unsigned vm = INSTR (20, 16);
4168 unsigned vn = INSTR (9, 5);
4169 unsigned vd = INSTR (4, 0);
4170 unsigned i;
4171 int full = INSTR (30, 30);
4172
4173 NYI_assert (29, 21, 0x073);
4174 NYI_assert (15, 10, 0x07);
4175
4176 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4177 for (i = 0; i < (full ? 16 : 8); i++)
4178 aarch64_set_vec_u8 (cpu, vd, i,
4179 aarch64_get_vec_u8 (cpu, vn, i)
4180 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4181 }
4182
4183 static void
4184 do_vec_XTN (sim_cpu *cpu)
4185 {
4186 /* instr[31] = 0
4187 instr[30] = first part (0)/ second part (1)
4188 instr[29,24] = 00 1110
4189 instr[23,22] = size: byte(00), half(01), word (10)
4190 instr[21,10] = 1000 0100 1010
4191 instr[9,5] = Vs
4192 instr[4,0] = Vd. */
4193
4194 unsigned vs = INSTR (9, 5);
4195 unsigned vd = INSTR (4, 0);
4196 unsigned bias = INSTR (30, 30);
4197 unsigned i;
4198
4199 NYI_assert (29, 24, 0x0E);
4200 NYI_assert (21, 10, 0x84A);
4201
4202 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4203 switch (INSTR (23, 22))
4204 {
4205 case 0:
4206 for (i = 0; i < 8; i++)
4207 aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4208 aarch64_get_vec_u16 (cpu, vs, i));
4209 return;
4210
4211 case 1:
4212 for (i = 0; i < 4; i++)
4213 aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4214 aarch64_get_vec_u32 (cpu, vs, i));
4215 return;
4216
4217 case 2:
4218 for (i = 0; i < 2; i++)
4219 aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4220 aarch64_get_vec_u64 (cpu, vs, i));
4221 return;
4222 }
4223 }
4224
4225 static void
4226 do_vec_maxv (sim_cpu *cpu)
4227 {
4228 /* instr[31] = 0
4229 instr[30] = half(0)/full(1)
4230 instr[29] = signed (0)/unsigned(1)
4231 instr[28,24] = 0 1110
4232 instr[23,22] = size: byte(00), half(01), word (10)
4233 instr[21] = 1
4234 instr[20,17] = 1 000
4235 instr[16] = max(0)/min(1)
4236 instr[15,10] = 1010 10
4237 instr[9,5] = V source
4238 instr[4.0] = R dest. */
4239
4240 unsigned vs = INSTR (9, 5);
4241 unsigned rd = INSTR (4, 0);
4242 unsigned full = INSTR (30, 30);
4243 unsigned i;
4244
4245 NYI_assert (28, 24, 0x0E);
4246 NYI_assert (21, 21, 1);
4247 NYI_assert (20, 17, 8);
4248 NYI_assert (15, 10, 0x2A);
4249
4250 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4251 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4252 {
4253 case 0: /* SMAXV. */
4254 {
4255 int64_t smax;
4256 switch (INSTR (23, 22))
4257 {
4258 case 0:
4259 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4260 for (i = 1; i < (full ? 16 : 8); i++)
4261 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4262 break;
4263 case 1:
4264 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4265 for (i = 1; i < (full ? 8 : 4); i++)
4266 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4267 break;
4268 case 2:
4269 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4270 for (i = 1; i < (full ? 4 : 2); i++)
4271 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4272 break;
4273 case 3:
4274 HALT_UNALLOC;
4275 }
4276 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4277 return;
4278 }
4279
4280 case 1: /* SMINV. */
4281 {
4282 int64_t smin;
4283 switch (INSTR (23, 22))
4284 {
4285 case 0:
4286 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4287 for (i = 1; i < (full ? 16 : 8); i++)
4288 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4289 break;
4290 case 1:
4291 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4292 for (i = 1; i < (full ? 8 : 4); i++)
4293 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4294 break;
4295 case 2:
4296 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4297 for (i = 1; i < (full ? 4 : 2); i++)
4298 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4299 break;
4300
4301 case 3:
4302 HALT_UNALLOC;
4303 }
4304 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4305 return;
4306 }
4307
4308 case 2: /* UMAXV. */
4309 {
4310 uint64_t umax;
4311 switch (INSTR (23, 22))
4312 {
4313 case 0:
4314 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4315 for (i = 1; i < (full ? 16 : 8); i++)
4316 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4317 break;
4318 case 1:
4319 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4320 for (i = 1; i < (full ? 8 : 4); i++)
4321 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4322 break;
4323 case 2:
4324 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4325 for (i = 1; i < (full ? 4 : 2); i++)
4326 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4327 break;
4328
4329 case 3:
4330 HALT_UNALLOC;
4331 }
4332 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4333 return;
4334 }
4335
4336 case 3: /* UMINV. */
4337 {
4338 uint64_t umin;
4339 switch (INSTR (23, 22))
4340 {
4341 case 0:
4342 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4343 for (i = 1; i < (full ? 16 : 8); i++)
4344 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4345 break;
4346 case 1:
4347 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4348 for (i = 1; i < (full ? 8 : 4); i++)
4349 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4350 break;
4351 case 2:
4352 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4353 for (i = 1; i < (full ? 4 : 2); i++)
4354 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4355 break;
4356
4357 case 3:
4358 HALT_UNALLOC;
4359 }
4360 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4361 return;
4362 }
4363 }
4364 }
4365
4366 static void
4367 do_vec_fminmaxV (sim_cpu *cpu)
4368 {
4369 /* instr[31,24] = 0110 1110
4370 instr[23] = max(0)/min(1)
4371 instr[22,14] = 011 0000 11
4372 instr[13,12] = nm(00)/normal(11)
4373 instr[11,10] = 10
4374 instr[9,5] = V source
4375 instr[4.0] = R dest. */
4376
4377 unsigned vs = INSTR (9, 5);
4378 unsigned rd = INSTR (4, 0);
4379 unsigned i;
4380 float res = aarch64_get_vec_float (cpu, vs, 0);
4381
4382 NYI_assert (31, 24, 0x6E);
4383 NYI_assert (22, 14, 0x0C3);
4384 NYI_assert (11, 10, 2);
4385
4386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4387 if (INSTR (23, 23))
4388 {
4389 switch (INSTR (13, 12))
4390 {
4391 case 0: /* FMNINNMV. */
4392 for (i = 1; i < 4; i++)
4393 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4394 break;
4395
4396 case 3: /* FMINV. */
4397 for (i = 1; i < 4; i++)
4398 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4399 break;
4400
4401 default:
4402 HALT_NYI;
4403 }
4404 }
4405 else
4406 {
4407 switch (INSTR (13, 12))
4408 {
4409 case 0: /* FMNAXNMV. */
4410 for (i = 1; i < 4; i++)
4411 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4412 break;
4413
4414 case 3: /* FMAXV. */
4415 for (i = 1; i < 4; i++)
4416 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4417 break;
4418
4419 default:
4420 HALT_NYI;
4421 }
4422 }
4423
4424 aarch64_set_FP_float (cpu, rd, res);
4425 }
4426
4427 static void
4428 do_vec_Fminmax (sim_cpu *cpu)
4429 {
4430 /* instr[31] = 0
4431 instr[30] = half(0)/full(1)
4432 instr[29,24] = 00 1110
4433 instr[23] = max(0)/min(1)
4434 instr[22] = float(0)/double(1)
4435 instr[21] = 1
4436 instr[20,16] = Vm
4437 instr[15,14] = 11
4438 instr[13,12] = nm(00)/normal(11)
4439 instr[11,10] = 01
4440 instr[9,5] = Vn
4441 instr[4,0] = Vd. */
4442
4443 unsigned vm = INSTR (20, 16);
4444 unsigned vn = INSTR (9, 5);
4445 unsigned vd = INSTR (4, 0);
4446 unsigned full = INSTR (30, 30);
4447 unsigned min = INSTR (23, 23);
4448 unsigned i;
4449
4450 NYI_assert (29, 24, 0x0E);
4451 NYI_assert (21, 21, 1);
4452 NYI_assert (15, 14, 3);
4453 NYI_assert (11, 10, 1);
4454
4455 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4456 if (INSTR (22, 22))
4457 {
4458 double (* func)(double, double);
4459
4460 if (! full)
4461 HALT_NYI;
4462
4463 if (INSTR (13, 12) == 0)
4464 func = min ? dminnm : dmaxnm;
4465 else if (INSTR (13, 12) == 3)
4466 func = min ? fmin : fmax;
4467 else
4468 HALT_NYI;
4469
4470 for (i = 0; i < 2; i++)
4471 aarch64_set_vec_double (cpu, vd, i,
4472 func (aarch64_get_vec_double (cpu, vn, i),
4473 aarch64_get_vec_double (cpu, vm, i)));
4474 }
4475 else
4476 {
4477 float (* func)(float, float);
4478
4479 if (INSTR (13, 12) == 0)
4480 func = min ? fminnm : fmaxnm;
4481 else if (INSTR (13, 12) == 3)
4482 func = min ? fminf : fmaxf;
4483 else
4484 HALT_NYI;
4485
4486 for (i = 0; i < (full ? 4 : 2); i++)
4487 aarch64_set_vec_float (cpu, vd, i,
4488 func (aarch64_get_vec_float (cpu, vn, i),
4489 aarch64_get_vec_float (cpu, vm, i)));
4490 }
4491 }
4492
4493 static void
4494 do_vec_SCVTF (sim_cpu *cpu)
4495 {
4496 /* instr[31] = 0
4497 instr[30] = Q
4498 instr[29,23] = 00 1110 0
4499 instr[22] = float(0)/double(1)
4500 instr[21,10] = 10 0001 1101 10
4501 instr[9,5] = Vn
4502 instr[4,0] = Vd. */
4503
4504 unsigned vn = INSTR (9, 5);
4505 unsigned vd = INSTR (4, 0);
4506 unsigned full = INSTR (30, 30);
4507 unsigned size = INSTR (22, 22);
4508 unsigned i;
4509
4510 NYI_assert (29, 23, 0x1C);
4511 NYI_assert (21, 10, 0x876);
4512
4513 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4514 if (size)
4515 {
4516 if (! full)
4517 HALT_UNALLOC;
4518
4519 for (i = 0; i < 2; i++)
4520 {
4521 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4522 aarch64_set_vec_double (cpu, vd, i, val);
4523 }
4524 }
4525 else
4526 {
4527 for (i = 0; i < (full ? 4 : 2); i++)
4528 {
4529 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4530 aarch64_set_vec_float (cpu, vd, i, val);
4531 }
4532 }
4533 }
4534
4535 #define VEC_CMP(SOURCE, CMP) \
4536 do \
4537 { \
4538 switch (size) \
4539 { \
4540 case 0: \
4541 for (i = 0; i < (full ? 16 : 8); i++) \
4542 aarch64_set_vec_u8 (cpu, vd, i, \
4543 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4544 CMP \
4545 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4546 ? -1 : 0); \
4547 return; \
4548 case 1: \
4549 for (i = 0; i < (full ? 8 : 4); i++) \
4550 aarch64_set_vec_u16 (cpu, vd, i, \
4551 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4552 CMP \
4553 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4554 ? -1 : 0); \
4555 return; \
4556 case 2: \
4557 for (i = 0; i < (full ? 4 : 2); i++) \
4558 aarch64_set_vec_u32 (cpu, vd, i, \
4559 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4560 CMP \
4561 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4562 ? -1 : 0); \
4563 return; \
4564 case 3: \
4565 if (! full) \
4566 HALT_UNALLOC; \
4567 for (i = 0; i < 2; i++) \
4568 aarch64_set_vec_u64 (cpu, vd, i, \
4569 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4570 CMP \
4571 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4572 ? -1ULL : 0); \
4573 return; \
4574 } \
4575 } \
4576 while (0)
4577
4578 #define VEC_CMP0(SOURCE, CMP) \
4579 do \
4580 { \
4581 switch (size) \
4582 { \
4583 case 0: \
4584 for (i = 0; i < (full ? 16 : 8); i++) \
4585 aarch64_set_vec_u8 (cpu, vd, i, \
4586 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4587 CMP 0 ? -1 : 0); \
4588 return; \
4589 case 1: \
4590 for (i = 0; i < (full ? 8 : 4); i++) \
4591 aarch64_set_vec_u16 (cpu, vd, i, \
4592 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4593 CMP 0 ? -1 : 0); \
4594 return; \
4595 case 2: \
4596 for (i = 0; i < (full ? 4 : 2); i++) \
4597 aarch64_set_vec_u32 (cpu, vd, i, \
4598 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4599 CMP 0 ? -1 : 0); \
4600 return; \
4601 case 3: \
4602 if (! full) \
4603 HALT_UNALLOC; \
4604 for (i = 0; i < 2; i++) \
4605 aarch64_set_vec_u64 (cpu, vd, i, \
4606 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4607 CMP 0 ? -1ULL : 0); \
4608 return; \
4609 } \
4610 } \
4611 while (0)
4612
4613 #define VEC_FCMP0(CMP) \
4614 do \
4615 { \
4616 if (vm != 0) \
4617 HALT_NYI; \
4618 if (INSTR (22, 22)) \
4619 { \
4620 if (! full) \
4621 HALT_NYI; \
4622 for (i = 0; i < 2; i++) \
4623 aarch64_set_vec_u64 (cpu, vd, i, \
4624 aarch64_get_vec_double (cpu, vn, i) \
4625 CMP 0.0 ? -1 : 0); \
4626 } \
4627 else \
4628 { \
4629 for (i = 0; i < (full ? 4 : 2); i++) \
4630 aarch64_set_vec_u32 (cpu, vd, i, \
4631 aarch64_get_vec_float (cpu, vn, i) \
4632 CMP 0.0 ? -1 : 0); \
4633 } \
4634 return; \
4635 } \
4636 while (0)
4637
4638 #define VEC_FCMP(CMP) \
4639 do \
4640 { \
4641 if (INSTR (22, 22)) \
4642 { \
4643 if (! full) \
4644 HALT_NYI; \
4645 for (i = 0; i < 2; i++) \
4646 aarch64_set_vec_u64 (cpu, vd, i, \
4647 aarch64_get_vec_double (cpu, vn, i) \
4648 CMP \
4649 aarch64_get_vec_double (cpu, vm, i) \
4650 ? -1 : 0); \
4651 } \
4652 else \
4653 { \
4654 for (i = 0; i < (full ? 4 : 2); i++) \
4655 aarch64_set_vec_u32 (cpu, vd, i, \
4656 aarch64_get_vec_float (cpu, vn, i) \
4657 CMP \
4658 aarch64_get_vec_float (cpu, vm, i) \
4659 ? -1 : 0); \
4660 } \
4661 return; \
4662 } \
4663 while (0)
4664
4665 static void
4666 do_vec_compare (sim_cpu *cpu)
4667 {
4668 /* instr[31] = 0
4669 instr[30] = half(0)/full(1)
4670 instr[29] = part-of-comparison-type
4671 instr[28,24] = 0 1110
4672 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4673 type of float compares: single (-0) / double (-1)
4674 instr[21] = 1
4675 instr[20,16] = Vm or 00000 (compare vs 0)
4676 instr[15,10] = part-of-comparison-type
4677 instr[9,5] = Vn
4678 instr[4.0] = Vd. */
4679
4680 int full = INSTR (30, 30);
4681 int size = INSTR (23, 22);
4682 unsigned vm = INSTR (20, 16);
4683 unsigned vn = INSTR (9, 5);
4684 unsigned vd = INSTR (4, 0);
4685 unsigned i;
4686
4687 NYI_assert (28, 24, 0x0E);
4688 NYI_assert (21, 21, 1);
4689
4690 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4691 if ((INSTR (11, 11)
4692 && INSTR (14, 14))
4693 || ((INSTR (11, 11) == 0
4694 && INSTR (10, 10) == 0)))
4695 {
4696 /* A compare vs 0. */
4697 if (vm != 0)
4698 {
4699 if (INSTR (15, 10) == 0x2A)
4700 do_vec_maxv (cpu);
4701 else if (INSTR (15, 10) == 0x32
4702 || INSTR (15, 10) == 0x3E)
4703 do_vec_fminmaxV (cpu);
4704 else if (INSTR (29, 23) == 0x1C
4705 && INSTR (21, 10) == 0x876)
4706 do_vec_SCVTF (cpu);
4707 else
4708 HALT_NYI;
4709 return;
4710 }
4711 }
4712
4713 if (INSTR (14, 14))
4714 {
4715 /* A floating point compare. */
4716 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4717 | INSTR (13, 10);
4718
4719 NYI_assert (15, 15, 1);
4720
4721 switch (decode)
4722 {
4723 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4724 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4725 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4726 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4727 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4728 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4729 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4730 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4731
4732 default:
4733 HALT_NYI;
4734 }
4735 }
4736 else
4737 {
4738 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4739
4740 switch (decode)
4741 {
4742 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4743 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4744 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4745 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4746 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4747 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4748 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4749 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4750 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4751 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4752 default:
4753 if (vm == 0)
4754 HALT_NYI;
4755 do_vec_maxv (cpu);
4756 }
4757 }
4758 }
4759
4760 static void
4761 do_vec_SSHL (sim_cpu *cpu)
4762 {
4763 /* instr[31] = 0
4764 instr[30] = first part (0)/ second part (1)
4765 instr[29,24] = 00 1110
4766 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4767 instr[21] = 1
4768 instr[20,16] = Vm
4769 instr[15,10] = 0100 01
4770 instr[9,5] = Vn
4771 instr[4,0] = Vd. */
4772
4773 unsigned full = INSTR (30, 30);
4774 unsigned vm = INSTR (20, 16);
4775 unsigned vn = INSTR (9, 5);
4776 unsigned vd = INSTR (4, 0);
4777 unsigned i;
4778 signed int shift;
4779
4780 NYI_assert (29, 24, 0x0E);
4781 NYI_assert (21, 21, 1);
4782 NYI_assert (15, 10, 0x11);
4783
4784 /* FIXME: What is a signed shift left in this context ?. */
4785
4786 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4787 switch (INSTR (23, 22))
4788 {
4789 case 0:
4790 for (i = 0; i < (full ? 16 : 8); i++)
4791 {
4792 shift = aarch64_get_vec_s8 (cpu, vm, i);
4793 if (shift >= 0)
4794 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4795 << shift);
4796 else
4797 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4798 >> - shift);
4799 }
4800 return;
4801
4802 case 1:
4803 for (i = 0; i < (full ? 8 : 4); i++)
4804 {
4805 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4806 if (shift >= 0)
4807 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4808 << shift);
4809 else
4810 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4811 >> - shift);
4812 }
4813 return;
4814
4815 case 2:
4816 for (i = 0; i < (full ? 4 : 2); i++)
4817 {
4818 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4819 if (shift >= 0)
4820 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4821 << shift);
4822 else
4823 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4824 >> - shift);
4825 }
4826 return;
4827
4828 case 3:
4829 if (! full)
4830 HALT_UNALLOC;
4831 for (i = 0; i < 2; i++)
4832 {
4833 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4834 if (shift >= 0)
4835 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4836 << shift);
4837 else
4838 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4839 >> - shift);
4840 }
4841 return;
4842 }
4843 }
4844
4845 static void
4846 do_vec_USHL (sim_cpu *cpu)
4847 {
4848 /* instr[31] = 0
4849 instr[30] = first part (0)/ second part (1)
4850 instr[29,24] = 10 1110
4851 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4852 instr[21] = 1
4853 instr[20,16] = Vm
4854 instr[15,10] = 0100 01
4855 instr[9,5] = Vn
4856 instr[4,0] = Vd */
4857
4858 unsigned full = INSTR (30, 30);
4859 unsigned vm = INSTR (20, 16);
4860 unsigned vn = INSTR (9, 5);
4861 unsigned vd = INSTR (4, 0);
4862 unsigned i;
4863 signed int shift;
4864
4865 NYI_assert (29, 24, 0x2E);
4866 NYI_assert (15, 10, 0x11);
4867
4868 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4869 switch (INSTR (23, 22))
4870 {
4871 case 0:
4872 for (i = 0; i < (full ? 16 : 8); i++)
4873 {
4874 shift = aarch64_get_vec_s8 (cpu, vm, i);
4875 if (shift >= 0)
4876 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4877 << shift);
4878 else
4879 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4880 >> - shift);
4881 }
4882 return;
4883
4884 case 1:
4885 for (i = 0; i < (full ? 8 : 4); i++)
4886 {
4887 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4888 if (shift >= 0)
4889 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4890 << shift);
4891 else
4892 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4893 >> - shift);
4894 }
4895 return;
4896
4897 case 2:
4898 for (i = 0; i < (full ? 4 : 2); i++)
4899 {
4900 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4901 if (shift >= 0)
4902 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4903 << shift);
4904 else
4905 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4906 >> - shift);
4907 }
4908 return;
4909
4910 case 3:
4911 if (! full)
4912 HALT_UNALLOC;
4913 for (i = 0; i < 2; i++)
4914 {
4915 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4916 if (shift >= 0)
4917 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4918 << shift);
4919 else
4920 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4921 >> - shift);
4922 }
4923 return;
4924 }
4925 }
4926
4927 static void
4928 do_vec_FMLA (sim_cpu *cpu)
4929 {
4930 /* instr[31] = 0
4931 instr[30] = full/half selector
4932 instr[29,23] = 0011100
4933 instr[22] = size: 0=>float, 1=>double
4934 instr[21] = 1
4935 instr[20,16] = Vn
4936 instr[15,10] = 1100 11
4937 instr[9,5] = Vm
4938 instr[4.0] = Vd. */
4939
4940 unsigned vm = INSTR (20, 16);
4941 unsigned vn = INSTR (9, 5);
4942 unsigned vd = INSTR (4, 0);
4943 unsigned i;
4944 int full = INSTR (30, 30);
4945
4946 NYI_assert (29, 23, 0x1C);
4947 NYI_assert (21, 21, 1);
4948 NYI_assert (15, 10, 0x33);
4949
4950 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4951 if (INSTR (22, 22))
4952 {
4953 if (! full)
4954 HALT_UNALLOC;
4955 for (i = 0; i < 2; i++)
4956 aarch64_set_vec_double (cpu, vd, i,
4957 aarch64_get_vec_double (cpu, vn, i) *
4958 aarch64_get_vec_double (cpu, vm, i) +
4959 aarch64_get_vec_double (cpu, vd, i));
4960 }
4961 else
4962 {
4963 for (i = 0; i < (full ? 4 : 2); i++)
4964 aarch64_set_vec_float (cpu, vd, i,
4965 aarch64_get_vec_float (cpu, vn, i) *
4966 aarch64_get_vec_float (cpu, vm, i) +
4967 aarch64_get_vec_float (cpu, vd, i));
4968 }
4969 }
4970
4971 static void
4972 do_vec_max (sim_cpu *cpu)
4973 {
4974 /* instr[31] = 0
4975 instr[30] = full/half selector
4976 instr[29] = SMAX (0) / UMAX (1)
4977 instr[28,24] = 0 1110
4978 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4979 instr[21] = 1
4980 instr[20,16] = Vn
4981 instr[15,10] = 0110 01
4982 instr[9,5] = Vm
4983 instr[4.0] = Vd. */
4984
4985 unsigned vm = INSTR (20, 16);
4986 unsigned vn = INSTR (9, 5);
4987 unsigned vd = INSTR (4, 0);
4988 unsigned i;
4989 int full = INSTR (30, 30);
4990
4991 NYI_assert (28, 24, 0x0E);
4992 NYI_assert (21, 21, 1);
4993 NYI_assert (15, 10, 0x19);
4994
4995 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4996 if (INSTR (29, 29))
4997 {
4998 switch (INSTR (23, 22))
4999 {
5000 case 0:
5001 for (i = 0; i < (full ? 16 : 8); i++)
5002 aarch64_set_vec_u8 (cpu, vd, i,
5003 aarch64_get_vec_u8 (cpu, vn, i)
5004 > aarch64_get_vec_u8 (cpu, vm, i)
5005 ? aarch64_get_vec_u8 (cpu, vn, i)
5006 : aarch64_get_vec_u8 (cpu, vm, i));
5007 return;
5008
5009 case 1:
5010 for (i = 0; i < (full ? 8 : 4); i++)
5011 aarch64_set_vec_u16 (cpu, vd, i,
5012 aarch64_get_vec_u16 (cpu, vn, i)
5013 > aarch64_get_vec_u16 (cpu, vm, i)
5014 ? aarch64_get_vec_u16 (cpu, vn, i)
5015 : aarch64_get_vec_u16 (cpu, vm, i));
5016 return;
5017
5018 case 2:
5019 for (i = 0; i < (full ? 4 : 2); i++)
5020 aarch64_set_vec_u32 (cpu, vd, i,
5021 aarch64_get_vec_u32 (cpu, vn, i)
5022 > aarch64_get_vec_u32 (cpu, vm, i)
5023 ? aarch64_get_vec_u32 (cpu, vn, i)
5024 : aarch64_get_vec_u32 (cpu, vm, i));
5025 return;
5026
5027 case 3:
5028 HALT_UNALLOC;
5029 }
5030 }
5031 else
5032 {
5033 switch (INSTR (23, 22))
5034 {
5035 case 0:
5036 for (i = 0; i < (full ? 16 : 8); i++)
5037 aarch64_set_vec_s8 (cpu, vd, i,
5038 aarch64_get_vec_s8 (cpu, vn, i)
5039 > aarch64_get_vec_s8 (cpu, vm, i)
5040 ? aarch64_get_vec_s8 (cpu, vn, i)
5041 : aarch64_get_vec_s8 (cpu, vm, i));
5042 return;
5043
5044 case 1:
5045 for (i = 0; i < (full ? 8 : 4); i++)
5046 aarch64_set_vec_s16 (cpu, vd, i,
5047 aarch64_get_vec_s16 (cpu, vn, i)
5048 > aarch64_get_vec_s16 (cpu, vm, i)
5049 ? aarch64_get_vec_s16 (cpu, vn, i)
5050 : aarch64_get_vec_s16 (cpu, vm, i));
5051 return;
5052
5053 case 2:
5054 for (i = 0; i < (full ? 4 : 2); i++)
5055 aarch64_set_vec_s32 (cpu, vd, i,
5056 aarch64_get_vec_s32 (cpu, vn, i)
5057 > aarch64_get_vec_s32 (cpu, vm, i)
5058 ? aarch64_get_vec_s32 (cpu, vn, i)
5059 : aarch64_get_vec_s32 (cpu, vm, i));
5060 return;
5061
5062 case 3:
5063 HALT_UNALLOC;
5064 }
5065 }
5066 }
5067
5068 static void
5069 do_vec_min (sim_cpu *cpu)
5070 {
5071 /* instr[31] = 0
5072 instr[30] = full/half selector
5073 instr[29] = SMIN (0) / UMIN (1)
5074 instr[28,24] = 0 1110
5075 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5076 instr[21] = 1
5077 instr[20,16] = Vn
5078 instr[15,10] = 0110 11
5079 instr[9,5] = Vm
5080 instr[4.0] = Vd. */
5081
5082 unsigned vm = INSTR (20, 16);
5083 unsigned vn = INSTR (9, 5);
5084 unsigned vd = INSTR (4, 0);
5085 unsigned i;
5086 int full = INSTR (30, 30);
5087
5088 NYI_assert (28, 24, 0x0E);
5089 NYI_assert (21, 21, 1);
5090 NYI_assert (15, 10, 0x1B);
5091
5092 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5093 if (INSTR (29, 29))
5094 {
5095 switch (INSTR (23, 22))
5096 {
5097 case 0:
5098 for (i = 0; i < (full ? 16 : 8); i++)
5099 aarch64_set_vec_u8 (cpu, vd, i,
5100 aarch64_get_vec_u8 (cpu, vn, i)
5101 < aarch64_get_vec_u8 (cpu, vm, i)
5102 ? aarch64_get_vec_u8 (cpu, vn, i)
5103 : aarch64_get_vec_u8 (cpu, vm, i));
5104 return;
5105
5106 case 1:
5107 for (i = 0; i < (full ? 8 : 4); i++)
5108 aarch64_set_vec_u16 (cpu, vd, i,
5109 aarch64_get_vec_u16 (cpu, vn, i)
5110 < aarch64_get_vec_u16 (cpu, vm, i)
5111 ? aarch64_get_vec_u16 (cpu, vn, i)
5112 : aarch64_get_vec_u16 (cpu, vm, i));
5113 return;
5114
5115 case 2:
5116 for (i = 0; i < (full ? 4 : 2); i++)
5117 aarch64_set_vec_u32 (cpu, vd, i,
5118 aarch64_get_vec_u32 (cpu, vn, i)
5119 < aarch64_get_vec_u32 (cpu, vm, i)
5120 ? aarch64_get_vec_u32 (cpu, vn, i)
5121 : aarch64_get_vec_u32 (cpu, vm, i));
5122 return;
5123
5124 case 3:
5125 HALT_UNALLOC;
5126 }
5127 }
5128 else
5129 {
5130 switch (INSTR (23, 22))
5131 {
5132 case 0:
5133 for (i = 0; i < (full ? 16 : 8); i++)
5134 aarch64_set_vec_s8 (cpu, vd, i,
5135 aarch64_get_vec_s8 (cpu, vn, i)
5136 < aarch64_get_vec_s8 (cpu, vm, i)
5137 ? aarch64_get_vec_s8 (cpu, vn, i)
5138 : aarch64_get_vec_s8 (cpu, vm, i));
5139 return;
5140
5141 case 1:
5142 for (i = 0; i < (full ? 8 : 4); i++)
5143 aarch64_set_vec_s16 (cpu, vd, i,
5144 aarch64_get_vec_s16 (cpu, vn, i)
5145 < aarch64_get_vec_s16 (cpu, vm, i)
5146 ? aarch64_get_vec_s16 (cpu, vn, i)
5147 : aarch64_get_vec_s16 (cpu, vm, i));
5148 return;
5149
5150 case 2:
5151 for (i = 0; i < (full ? 4 : 2); i++)
5152 aarch64_set_vec_s32 (cpu, vd, i,
5153 aarch64_get_vec_s32 (cpu, vn, i)
5154 < aarch64_get_vec_s32 (cpu, vm, i)
5155 ? aarch64_get_vec_s32 (cpu, vn, i)
5156 : aarch64_get_vec_s32 (cpu, vm, i));
5157 return;
5158
5159 case 3:
5160 HALT_UNALLOC;
5161 }
5162 }
5163 }
5164
5165 static void
5166 do_vec_sub_long (sim_cpu *cpu)
5167 {
5168 /* instr[31] = 0
5169 instr[30] = lower (0) / upper (1)
5170 instr[29] = signed (0) / unsigned (1)
5171 instr[28,24] = 0 1110
5172 instr[23,22] = size: bytes (00), half (01), word (10)
5173 instr[21] = 1
5174 insrt[20,16] = Vm
5175 instr[15,10] = 0010 00
5176 instr[9,5] = Vn
5177 instr[4,0] = V dest. */
5178
5179 unsigned size = INSTR (23, 22);
5180 unsigned vm = INSTR (20, 16);
5181 unsigned vn = INSTR (9, 5);
5182 unsigned vd = INSTR (4, 0);
5183 unsigned bias = 0;
5184 unsigned i;
5185
5186 NYI_assert (28, 24, 0x0E);
5187 NYI_assert (21, 21, 1);
5188 NYI_assert (15, 10, 0x08);
5189
5190 if (size == 3)
5191 HALT_UNALLOC;
5192
5193 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5194 switch (INSTR (30, 29))
5195 {
5196 case 2: /* SSUBL2. */
5197 bias = 2;
5198 case 0: /* SSUBL. */
5199 switch (size)
5200 {
5201 case 0:
5202 bias *= 3;
5203 for (i = 0; i < 8; i++)
5204 aarch64_set_vec_s16 (cpu, vd, i,
5205 aarch64_get_vec_s8 (cpu, vn, i + bias)
5206 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5207 break;
5208
5209 case 1:
5210 bias *= 2;
5211 for (i = 0; i < 4; i++)
5212 aarch64_set_vec_s32 (cpu, vd, i,
5213 aarch64_get_vec_s16 (cpu, vn, i + bias)
5214 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5215 break;
5216
5217 case 2:
5218 for (i = 0; i < 2; i++)
5219 aarch64_set_vec_s64 (cpu, vd, i,
5220 aarch64_get_vec_s32 (cpu, vn, i + bias)
5221 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5222 break;
5223
5224 default:
5225 HALT_UNALLOC;
5226 }
5227 break;
5228
5229 case 3: /* USUBL2. */
5230 bias = 2;
5231 case 1: /* USUBL. */
5232 switch (size)
5233 {
5234 case 0:
5235 bias *= 3;
5236 for (i = 0; i < 8; i++)
5237 aarch64_set_vec_u16 (cpu, vd, i,
5238 aarch64_get_vec_u8 (cpu, vn, i + bias)
5239 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5240 break;
5241
5242 case 1:
5243 bias *= 2;
5244 for (i = 0; i < 4; i++)
5245 aarch64_set_vec_u32 (cpu, vd, i,
5246 aarch64_get_vec_u16 (cpu, vn, i + bias)
5247 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5248 break;
5249
5250 case 2:
5251 for (i = 0; i < 2; i++)
5252 aarch64_set_vec_u64 (cpu, vd, i,
5253 aarch64_get_vec_u32 (cpu, vn, i + bias)
5254 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5255 break;
5256
5257 default:
5258 HALT_UNALLOC;
5259 }
5260 break;
5261 }
5262 }
5263
5264 static void
5265 do_vec_ADDP (sim_cpu *cpu)
5266 {
5267 /* instr[31] = 0
5268 instr[30] = half(0)/full(1)
5269 instr[29,24] = 00 1110
5270 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5271 instr[21] = 1
5272 insrt[20,16] = Vm
5273 instr[15,10] = 1011 11
5274 instr[9,5] = Vn
5275 instr[4,0] = V dest. */
5276
5277 FRegister copy_vn;
5278 FRegister copy_vm;
5279 unsigned full = INSTR (30, 30);
5280 unsigned size = INSTR (23, 22);
5281 unsigned vm = INSTR (20, 16);
5282 unsigned vn = INSTR (9, 5);
5283 unsigned vd = INSTR (4, 0);
5284 unsigned i, range;
5285
5286 NYI_assert (29, 24, 0x0E);
5287 NYI_assert (21, 21, 1);
5288 NYI_assert (15, 10, 0x2F);
5289
5290 /* Make copies of the source registers in case vd == vn/vm. */
5291 copy_vn = cpu->fr[vn];
5292 copy_vm = cpu->fr[vm];
5293
5294 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5295 switch (size)
5296 {
5297 case 0:
5298 range = full ? 8 : 4;
5299 for (i = 0; i < range; i++)
5300 {
5301 aarch64_set_vec_u8 (cpu, vd, i,
5302 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5303 aarch64_set_vec_u8 (cpu, vd, i + range,
5304 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5305 }
5306 return;
5307
5308 case 1:
5309 range = full ? 4 : 2;
5310 for (i = 0; i < range; i++)
5311 {
5312 aarch64_set_vec_u16 (cpu, vd, i,
5313 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5314 aarch64_set_vec_u16 (cpu, vd, i + range,
5315 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5316 }
5317 return;
5318
5319 case 2:
5320 range = full ? 2 : 1;
5321 for (i = 0; i < range; i++)
5322 {
5323 aarch64_set_vec_u32 (cpu, vd, i,
5324 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5325 aarch64_set_vec_u32 (cpu, vd, i + range,
5326 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5327 }
5328 return;
5329
5330 case 3:
5331 if (! full)
5332 HALT_UNALLOC;
5333 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5334 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5335 return;
5336 }
5337 }
5338
5339 static void
5340 do_vec_UMOV (sim_cpu *cpu)
5341 {
5342 /* instr[31] = 0
5343 instr[30] = 32-bit(0)/64-bit(1)
5344 instr[29,21] = 00 1110 000
5345 insrt[20,16] = size & index
5346 instr[15,10] = 0011 11
5347 instr[9,5] = V source
5348 instr[4,0] = R dest. */
5349
5350 unsigned vs = INSTR (9, 5);
5351 unsigned rd = INSTR (4, 0);
5352 unsigned index;
5353
5354 NYI_assert (29, 21, 0x070);
5355 NYI_assert (15, 10, 0x0F);
5356
5357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5358 if (INSTR (16, 16))
5359 {
5360 /* Byte transfer. */
5361 index = INSTR (20, 17);
5362 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5363 aarch64_get_vec_u8 (cpu, vs, index));
5364 }
5365 else if (INSTR (17, 17))
5366 {
5367 index = INSTR (20, 18);
5368 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5369 aarch64_get_vec_u16 (cpu, vs, index));
5370 }
5371 else if (INSTR (18, 18))
5372 {
5373 index = INSTR (20, 19);
5374 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5375 aarch64_get_vec_u32 (cpu, vs, index));
5376 }
5377 else
5378 {
5379 if (INSTR (30, 30) != 1)
5380 HALT_UNALLOC;
5381
5382 index = INSTR (20, 20);
5383 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5384 aarch64_get_vec_u64 (cpu, vs, index));
5385 }
5386 }
5387
5388 static void
5389 do_vec_FABS (sim_cpu *cpu)
5390 {
5391 /* instr[31] = 0
5392 instr[30] = half(0)/full(1)
5393 instr[29,23] = 00 1110 1
5394 instr[22] = float(0)/double(1)
5395 instr[21,16] = 10 0000
5396 instr[15,10] = 1111 10
5397 instr[9,5] = Vn
5398 instr[4,0] = Vd. */
5399
5400 unsigned vn = INSTR (9, 5);
5401 unsigned vd = INSTR (4, 0);
5402 unsigned full = INSTR (30, 30);
5403 unsigned i;
5404
5405 NYI_assert (29, 23, 0x1D);
5406 NYI_assert (21, 10, 0x83E);
5407
5408 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5409 if (INSTR (22, 22))
5410 {
5411 if (! full)
5412 HALT_NYI;
5413
5414 for (i = 0; i < 2; i++)
5415 aarch64_set_vec_double (cpu, vd, i,
5416 fabs (aarch64_get_vec_double (cpu, vn, i)));
5417 }
5418 else
5419 {
5420 for (i = 0; i < (full ? 4 : 2); i++)
5421 aarch64_set_vec_float (cpu, vd, i,
5422 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5423 }
5424 }
5425
5426 static void
5427 do_vec_FCVTZS (sim_cpu *cpu)
5428 {
5429 /* instr[31] = 0
5430 instr[30] = half (0) / all (1)
5431 instr[29,23] = 00 1110 1
5432 instr[22] = single (0) / double (1)
5433 instr[21,10] = 10 0001 1011 10
5434 instr[9,5] = Rn
5435 instr[4,0] = Rd. */
5436
5437 unsigned rn = INSTR (9, 5);
5438 unsigned rd = INSTR (4, 0);
5439 unsigned full = INSTR (30, 30);
5440 unsigned i;
5441
5442 NYI_assert (31, 31, 0);
5443 NYI_assert (29, 23, 0x1D);
5444 NYI_assert (21, 10, 0x86E);
5445
5446 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5447 if (INSTR (22, 22))
5448 {
5449 if (! full)
5450 HALT_UNALLOC;
5451
5452 for (i = 0; i < 2; i++)
5453 aarch64_set_vec_s64 (cpu, rd, i,
5454 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5455 }
5456 else
5457 for (i = 0; i < (full ? 4 : 2); i++)
5458 aarch64_set_vec_s32 (cpu, rd, i,
5459 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5460 }
5461
5462 static void
5463 do_vec_REV64 (sim_cpu *cpu)
5464 {
5465 /* instr[31] = 0
5466 instr[30] = full/half
5467 instr[29,24] = 00 1110
5468 instr[23,22] = size
5469 instr[21,10] = 10 0000 0000 10
5470 instr[9,5] = Rn
5471 instr[4,0] = Rd. */
5472
5473 unsigned rn = INSTR (9, 5);
5474 unsigned rd = INSTR (4, 0);
5475 unsigned size = INSTR (23, 22);
5476 unsigned full = INSTR (30, 30);
5477 unsigned i;
5478 FRegister val;
5479
5480 NYI_assert (29, 24, 0x0E);
5481 NYI_assert (21, 10, 0x802);
5482
5483 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5484 switch (size)
5485 {
5486 case 0:
5487 for (i = 0; i < (full ? 16 : 8); i++)
5488 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5489 break;
5490
5491 case 1:
5492 for (i = 0; i < (full ? 8 : 4); i++)
5493 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5494 break;
5495
5496 case 2:
5497 for (i = 0; i < (full ? 4 : 2); i++)
5498 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5499 break;
5500
5501 case 3:
5502 HALT_UNALLOC;
5503 }
5504
5505 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5506 if (full)
5507 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5508 }
5509
5510 static void
5511 do_vec_REV16 (sim_cpu *cpu)
5512 {
5513 /* instr[31] = 0
5514 instr[30] = full/half
5515 instr[29,24] = 00 1110
5516 instr[23,22] = size
5517 instr[21,10] = 10 0000 0001 10
5518 instr[9,5] = Rn
5519 instr[4,0] = Rd. */
5520
5521 unsigned rn = INSTR (9, 5);
5522 unsigned rd = INSTR (4, 0);
5523 unsigned size = INSTR (23, 22);
5524 unsigned full = INSTR (30, 30);
5525 unsigned i;
5526 FRegister val;
5527
5528 NYI_assert (29, 24, 0x0E);
5529 NYI_assert (21, 10, 0x806);
5530
5531 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5532 switch (size)
5533 {
5534 case 0:
5535 for (i = 0; i < (full ? 16 : 8); i++)
5536 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5537 break;
5538
5539 default:
5540 HALT_UNALLOC;
5541 }
5542
5543 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5544 if (full)
5545 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5546 }
5547
5548 static void
5549 do_vec_op1 (sim_cpu *cpu)
5550 {
5551 /* instr[31] = 0
5552 instr[30] = half/full
5553 instr[29,24] = 00 1110
5554 instr[23,21] = ???
5555 instr[20,16] = Vm
5556 instr[15,10] = sub-opcode
5557 instr[9,5] = Vn
5558 instr[4,0] = Vd */
5559 NYI_assert (29, 24, 0x0E);
5560
5561 if (INSTR (21, 21) == 0)
5562 {
5563 if (INSTR (23, 22) == 0)
5564 {
5565 if (INSTR (30, 30) == 1
5566 && INSTR (17, 14) == 0
5567 && INSTR (12, 10) == 7)
5568 return do_vec_ins_2 (cpu);
5569
5570 switch (INSTR (15, 10))
5571 {
5572 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5573 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5574 case 0x07: do_vec_INS (cpu); return;
5575 case 0x0A: do_vec_TRN (cpu); return;
5576
5577 case 0x0F:
5578 if (INSTR (17, 16) == 0)
5579 {
5580 do_vec_MOV_into_scalar (cpu);
5581 return;
5582 }
5583 break;
5584
5585 case 0x00:
5586 case 0x08:
5587 case 0x10:
5588 case 0x18:
5589 do_vec_TBL (cpu); return;
5590
5591 case 0x06:
5592 case 0x16:
5593 do_vec_UZP (cpu); return;
5594
5595 case 0x0E:
5596 case 0x1E:
5597 do_vec_ZIP (cpu); return;
5598
5599 default:
5600 HALT_NYI;
5601 }
5602 }
5603
5604 switch (INSTR (13, 10))
5605 {
5606 case 0x6: do_vec_UZP (cpu); return;
5607 case 0xE: do_vec_ZIP (cpu); return;
5608 case 0xA: do_vec_TRN (cpu); return;
5609 case 0xF: do_vec_UMOV (cpu); return;
5610 default: HALT_NYI;
5611 }
5612 }
5613
5614 switch (INSTR (15, 10))
5615 {
5616 case 0x02: do_vec_REV64 (cpu); return;
5617 case 0x06: do_vec_REV16 (cpu); return;
5618
5619 case 0x07:
5620 switch (INSTR (23, 21))
5621 {
5622 case 1: do_vec_AND (cpu); return;
5623 case 3: do_vec_BIC (cpu); return;
5624 case 5: do_vec_ORR (cpu); return;
5625 case 7: do_vec_ORN (cpu); return;
5626 default: HALT_NYI;
5627 }
5628
5629 case 0x08: do_vec_sub_long (cpu); return;
5630 case 0x0a: do_vec_XTN (cpu); return;
5631 case 0x11: do_vec_SSHL (cpu); return;
5632 case 0x19: do_vec_max (cpu); return;
5633 case 0x1B: do_vec_min (cpu); return;
5634 case 0x21: do_vec_add (cpu); return;
5635 case 0x25: do_vec_MLA (cpu); return;
5636 case 0x27: do_vec_mul (cpu); return;
5637 case 0x2F: do_vec_ADDP (cpu); return;
5638 case 0x30: do_vec_mull (cpu); return;
5639 case 0x33: do_vec_FMLA (cpu); return;
5640 case 0x35: do_vec_fadd (cpu); return;
5641
5642 case 0x2E:
5643 switch (INSTR (20, 16))
5644 {
5645 case 0x00: do_vec_ABS (cpu); return;
5646 case 0x01: do_vec_FCVTZS (cpu); return;
5647 case 0x11: do_vec_ADDV (cpu); return;
5648 default: HALT_NYI;
5649 }
5650
5651 case 0x31:
5652 case 0x3B:
5653 do_vec_Fminmax (cpu); return;
5654
5655 case 0x0D:
5656 case 0x0F:
5657 case 0x22:
5658 case 0x23:
5659 case 0x26:
5660 case 0x2A:
5661 case 0x32:
5662 case 0x36:
5663 case 0x39:
5664 case 0x3A:
5665 do_vec_compare (cpu); return;
5666
5667 case 0x3E:
5668 do_vec_FABS (cpu); return;
5669
5670 default:
5671 HALT_NYI;
5672 }
5673 }
5674
5675 static void
5676 do_vec_xtl (sim_cpu *cpu)
5677 {
5678 /* instr[31] = 0
5679 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5680 instr[28,22] = 0 1111 00
5681 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5682 instr[15,10] = 1010 01
5683 instr[9,5] = V source
5684 instr[4,0] = V dest. */
5685
5686 unsigned vs = INSTR (9, 5);
5687 unsigned vd = INSTR (4, 0);
5688 unsigned i, shift, bias = 0;
5689
5690 NYI_assert (28, 22, 0x3C);
5691 NYI_assert (15, 10, 0x29);
5692
5693 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5694 switch (INSTR (30, 29))
5695 {
5696 case 2: /* SXTL2, SSHLL2. */
5697 bias = 2;
5698 case 0: /* SXTL, SSHLL. */
5699 if (INSTR (21, 21))
5700 {
5701 int64_t val1, val2;
5702
5703 shift = INSTR (20, 16);
5704 /* Get the source values before setting the destination values
5705 in case the source and destination are the same. */
5706 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5707 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5708 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5709 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5710 }
5711 else if (INSTR (20, 20))
5712 {
5713 int32_t v[4];
5714 int32_t v1,v2,v3,v4;
5715
5716 shift = INSTR (19, 16);
5717 bias *= 2;
5718 for (i = 0; i < 4; i++)
5719 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5720 for (i = 0; i < 4; i++)
5721 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5722 }
5723 else
5724 {
5725 int16_t v[8];
5726 NYI_assert (19, 19, 1);
5727
5728 shift = INSTR (18, 16);
5729 bias *= 3;
5730 for (i = 0; i < 8; i++)
5731 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5732 for (i = 0; i < 8; i++)
5733 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5734 }
5735 return;
5736
5737 case 3: /* UXTL2, USHLL2. */
5738 bias = 2;
5739 case 1: /* UXTL, USHLL. */
5740 if (INSTR (21, 21))
5741 {
5742 uint64_t v1, v2;
5743 shift = INSTR (20, 16);
5744 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5745 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5746 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5747 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5748 }
5749 else if (INSTR (20, 20))
5750 {
5751 uint32_t v[4];
5752 shift = INSTR (19, 16);
5753 bias *= 2;
5754 for (i = 0; i < 4; i++)
5755 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5756 for (i = 0; i < 4; i++)
5757 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5758 }
5759 else
5760 {
5761 uint16_t v[8];
5762 NYI_assert (19, 19, 1);
5763
5764 shift = INSTR (18, 16);
5765 bias *= 3;
5766 for (i = 0; i < 8; i++)
5767 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5768 for (i = 0; i < 8; i++)
5769 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5770 }
5771 return;
5772 }
5773 }
5774
5775 static void
5776 do_vec_SHL (sim_cpu *cpu)
5777 {
5778 /* instr [31] = 0
5779 instr [30] = half(0)/full(1)
5780 instr [29,23] = 001 1110
5781 instr [22,16] = size and shift amount
5782 instr [15,10] = 01 0101
5783 instr [9, 5] = Vs
5784 instr [4, 0] = Vd. */
5785
5786 int shift;
5787 int full = INSTR (30, 30);
5788 unsigned vs = INSTR (9, 5);
5789 unsigned vd = INSTR (4, 0);
5790 unsigned i;
5791
5792 NYI_assert (29, 23, 0x1E);
5793 NYI_assert (15, 10, 0x15);
5794
5795 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5796 if (INSTR (22, 22))
5797 {
5798 shift = INSTR (21, 16);
5799
5800 if (full == 0)
5801 HALT_UNALLOC;
5802
5803 for (i = 0; i < 2; i++)
5804 {
5805 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5806 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5807 }
5808
5809 return;
5810 }
5811
5812 if (INSTR (21, 21))
5813 {
5814 shift = INSTR (20, 16);
5815
5816 for (i = 0; i < (full ? 4 : 2); i++)
5817 {
5818 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5819 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5820 }
5821
5822 return;
5823 }
5824
5825 if (INSTR (20, 20))
5826 {
5827 shift = INSTR (19, 16);
5828
5829 for (i = 0; i < (full ? 8 : 4); i++)
5830 {
5831 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5832 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5833 }
5834
5835 return;
5836 }
5837
5838 if (INSTR (19, 19) == 0)
5839 HALT_UNALLOC;
5840
5841 shift = INSTR (18, 16);
5842
5843 for (i = 0; i < (full ? 16 : 8); i++)
5844 {
5845 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5846 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5847 }
5848 }
5849
5850 static void
5851 do_vec_SSHR_USHR (sim_cpu *cpu)
5852 {
5853 /* instr [31] = 0
5854 instr [30] = half(0)/full(1)
5855 instr [29] = signed(0)/unsigned(1)
5856 instr [28,23] = 0 1111 0
5857 instr [22,16] = size and shift amount
5858 instr [15,10] = 0000 01
5859 instr [9, 5] = Vs
5860 instr [4, 0] = Vd. */
5861
5862 int full = INSTR (30, 30);
5863 int sign = ! INSTR (29, 29);
5864 unsigned shift = INSTR (22, 16);
5865 unsigned vs = INSTR (9, 5);
5866 unsigned vd = INSTR (4, 0);
5867 unsigned i;
5868
5869 NYI_assert (28, 23, 0x1E);
5870 NYI_assert (15, 10, 0x01);
5871
5872 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5873 if (INSTR (22, 22))
5874 {
5875 shift = 128 - shift;
5876
5877 if (full == 0)
5878 HALT_UNALLOC;
5879
5880 if (sign)
5881 for (i = 0; i < 2; i++)
5882 {
5883 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5884 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5885 }
5886 else
5887 for (i = 0; i < 2; i++)
5888 {
5889 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5890 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5891 }
5892
5893 return;
5894 }
5895
5896 if (INSTR (21, 21))
5897 {
5898 shift = 64 - shift;
5899
5900 if (sign)
5901 for (i = 0; i < (full ? 4 : 2); i++)
5902 {
5903 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5904 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5905 }
5906 else
5907 for (i = 0; i < (full ? 4 : 2); i++)
5908 {
5909 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5910 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5911 }
5912
5913 return;
5914 }
5915
5916 if (INSTR (20, 20))
5917 {
5918 shift = 32 - shift;
5919
5920 if (sign)
5921 for (i = 0; i < (full ? 8 : 4); i++)
5922 {
5923 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
5924 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
5925 }
5926 else
5927 for (i = 0; i < (full ? 8 : 4); i++)
5928 {
5929 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5930 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
5931 }
5932
5933 return;
5934 }
5935
5936 if (INSTR (19, 19) == 0)
5937 HALT_UNALLOC;
5938
5939 shift = 16 - shift;
5940
5941 if (sign)
5942 for (i = 0; i < (full ? 16 : 8); i++)
5943 {
5944 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
5945 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
5946 }
5947 else
5948 for (i = 0; i < (full ? 16 : 8); i++)
5949 {
5950 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5951 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
5952 }
5953 }
5954
5955 static void
5956 do_vec_MUL_by_element (sim_cpu *cpu)
5957 {
5958 /* instr[31] = 0
5959 instr[30] = half/full
5960 instr[29,24] = 00 1111
5961 instr[23,22] = size
5962 instr[21] = L
5963 instr[20] = M
5964 instr[19,16] = m
5965 instr[15,12] = 1000
5966 instr[11] = H
5967 instr[10] = 0
5968 instr[9,5] = Vn
5969 instr[4,0] = Vd */
5970
5971 unsigned full = INSTR (30, 30);
5972 unsigned L = INSTR (21, 21);
5973 unsigned H = INSTR (11, 11);
5974 unsigned vn = INSTR (9, 5);
5975 unsigned vd = INSTR (4, 0);
5976 unsigned size = INSTR (23, 22);
5977 unsigned index;
5978 unsigned vm;
5979 unsigned e;
5980
5981 NYI_assert (29, 24, 0x0F);
5982 NYI_assert (15, 12, 0x8);
5983 NYI_assert (10, 10, 0);
5984
5985 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5986 switch (size)
5987 {
5988 case 1:
5989 {
5990 /* 16 bit products. */
5991 uint16_t product;
5992 uint16_t element1;
5993 uint16_t element2;
5994
5995 index = (H << 2) | (L << 1) | INSTR (20, 20);
5996 vm = INSTR (19, 16);
5997 element2 = aarch64_get_vec_u16 (cpu, vm, index);
5998
5999 for (e = 0; e < (full ? 8 : 4); e ++)
6000 {
6001 element1 = aarch64_get_vec_u16 (cpu, vn, e);
6002 product = element1 * element2;
6003 aarch64_set_vec_u16 (cpu, vd, e, product);
6004 }
6005 }
6006 break;
6007
6008 case 2:
6009 {
6010 /* 32 bit products. */
6011 uint32_t product;
6012 uint32_t element1;
6013 uint32_t element2;
6014
6015 index = (H << 1) | L;
6016 vm = INSTR (20, 16);
6017 element2 = aarch64_get_vec_u32 (cpu, vm, index);
6018
6019 for (e = 0; e < (full ? 4 : 2); e ++)
6020 {
6021 element1 = aarch64_get_vec_u32 (cpu, vn, e);
6022 product = element1 * element2;
6023 aarch64_set_vec_u32 (cpu, vd, e, product);
6024 }
6025 }
6026 break;
6027
6028 default:
6029 HALT_UNALLOC;
6030 }
6031 }
6032
6033 static void
6034 do_FMLA_by_element (sim_cpu *cpu)
6035 {
6036 /* instr[31] = 0
6037 instr[30] = half/full
6038 instr[29,23] = 00 1111 1
6039 instr[22] = size
6040 instr[21] = L
6041 instr[20,16] = m
6042 instr[15,12] = 0001
6043 instr[11] = H
6044 instr[10] = 0
6045 instr[9,5] = Vn
6046 instr[4,0] = Vd */
6047
6048 unsigned full = INSTR (30, 30);
6049 unsigned size = INSTR (22, 22);
6050 unsigned L = INSTR (21, 21);
6051 unsigned vm = INSTR (20, 16);
6052 unsigned H = INSTR (11, 11);
6053 unsigned vn = INSTR (9, 5);
6054 unsigned vd = INSTR (4, 0);
6055 unsigned e;
6056
6057 NYI_assert (29, 23, 0x1F);
6058 NYI_assert (15, 12, 0x1);
6059 NYI_assert (10, 10, 0);
6060
6061 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6062 if (size)
6063 {
6064 double element1, element2;
6065
6066 if (! full || L)
6067 HALT_UNALLOC;
6068
6069 element2 = aarch64_get_vec_double (cpu, vm, H);
6070
6071 for (e = 0; e < 2; e++)
6072 {
6073 element1 = aarch64_get_vec_double (cpu, vn, e);
6074 element1 *= element2;
6075 element1 += aarch64_get_vec_double (cpu, vd, e);
6076 aarch64_set_vec_double (cpu, vd, e, element1);
6077 }
6078 }
6079 else
6080 {
6081 float element1;
6082 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6083
6084 for (e = 0; e < (full ? 4 : 2); e++)
6085 {
6086 element1 = aarch64_get_vec_float (cpu, vn, e);
6087 element1 *= element2;
6088 element1 += aarch64_get_vec_float (cpu, vd, e);
6089 aarch64_set_vec_float (cpu, vd, e, element1);
6090 }
6091 }
6092 }
6093
6094 static void
6095 do_vec_op2 (sim_cpu *cpu)
6096 {
6097 /* instr[31] = 0
6098 instr[30] = half/full
6099 instr[29,24] = 00 1111
6100 instr[23] = ?
6101 instr[22,16] = element size & index
6102 instr[15,10] = sub-opcode
6103 instr[9,5] = Vm
6104 instr[4,0] = Vd */
6105
6106 NYI_assert (29, 24, 0x0F);
6107
6108 if (INSTR (23, 23) != 0)
6109 {
6110 switch (INSTR (15, 10))
6111 {
6112 case 0x04:
6113 case 0x06:
6114 do_FMLA_by_element (cpu);
6115 return;
6116
6117 case 0x20:
6118 case 0x22:
6119 do_vec_MUL_by_element (cpu);
6120 return;
6121
6122 default:
6123 HALT_NYI;
6124 }
6125 }
6126 else
6127 {
6128 switch (INSTR (15, 10))
6129 {
6130 case 0x01: do_vec_SSHR_USHR (cpu); return;
6131 case 0x15: do_vec_SHL (cpu); return;
6132 case 0x20:
6133 case 0x22: do_vec_MUL_by_element (cpu); return;
6134 case 0x29: do_vec_xtl (cpu); return;
6135 default: HALT_NYI;
6136 }
6137 }
6138 }
6139
6140 static void
6141 do_vec_neg (sim_cpu *cpu)
6142 {
6143 /* instr[31] = 0
6144 instr[30] = full(1)/half(0)
6145 instr[29,24] = 10 1110
6146 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6147 instr[21,10] = 1000 0010 1110
6148 instr[9,5] = Vs
6149 instr[4,0] = Vd */
6150
6151 int full = INSTR (30, 30);
6152 unsigned vs = INSTR (9, 5);
6153 unsigned vd = INSTR (4, 0);
6154 unsigned i;
6155
6156 NYI_assert (29, 24, 0x2E);
6157 NYI_assert (21, 10, 0x82E);
6158
6159 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6160 switch (INSTR (23, 22))
6161 {
6162 case 0:
6163 for (i = 0; i < (full ? 16 : 8); i++)
6164 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6165 return;
6166
6167 case 1:
6168 for (i = 0; i < (full ? 8 : 4); i++)
6169 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6170 return;
6171
6172 case 2:
6173 for (i = 0; i < (full ? 4 : 2); i++)
6174 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6175 return;
6176
6177 case 3:
6178 if (! full)
6179 HALT_NYI;
6180 for (i = 0; i < 2; i++)
6181 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6182 return;
6183 }
6184 }
6185
6186 static void
6187 do_vec_sqrt (sim_cpu *cpu)
6188 {
6189 /* instr[31] = 0
6190 instr[30] = full(1)/half(0)
6191 instr[29,23] = 101 1101
6192 instr[22] = single(0)/double(1)
6193 instr[21,10] = 1000 0111 1110
6194 instr[9,5] = Vs
6195 instr[4,0] = Vd. */
6196
6197 int full = INSTR (30, 30);
6198 unsigned vs = INSTR (9, 5);
6199 unsigned vd = INSTR (4, 0);
6200 unsigned i;
6201
6202 NYI_assert (29, 23, 0x5B);
6203 NYI_assert (21, 10, 0x87E);
6204
6205 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6206 if (INSTR (22, 22) == 0)
6207 for (i = 0; i < (full ? 4 : 2); i++)
6208 aarch64_set_vec_float (cpu, vd, i,
6209 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6210 else
6211 for (i = 0; i < 2; i++)
6212 aarch64_set_vec_double (cpu, vd, i,
6213 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6214 }
6215
6216 static void
6217 do_vec_mls_indexed (sim_cpu *cpu)
6218 {
6219 /* instr[31] = 0
6220 instr[30] = half(0)/full(1)
6221 instr[29,24] = 10 1111
6222 instr[23,22] = 16-bit(01)/32-bit(10)
6223 instr[21,20+11] = index (if 16-bit)
6224 instr[21+11] = index (if 32-bit)
6225 instr[20,16] = Vm
6226 instr[15,12] = 0100
6227 instr[11] = part of index
6228 instr[10] = 0
6229 instr[9,5] = Vs
6230 instr[4,0] = Vd. */
6231
6232 int full = INSTR (30, 30);
6233 unsigned vs = INSTR (9, 5);
6234 unsigned vd = INSTR (4, 0);
6235 unsigned vm = INSTR (20, 16);
6236 unsigned i;
6237
6238 NYI_assert (15, 12, 4);
6239 NYI_assert (10, 10, 0);
6240
6241 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6242 switch (INSTR (23, 22))
6243 {
6244 case 1:
6245 {
6246 unsigned elem;
6247 uint32_t val;
6248
6249 if (vm > 15)
6250 HALT_NYI;
6251
6252 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6253 val = aarch64_get_vec_u16 (cpu, vm, elem);
6254
6255 for (i = 0; i < (full ? 8 : 4); i++)
6256 aarch64_set_vec_u32 (cpu, vd, i,
6257 aarch64_get_vec_u32 (cpu, vd, i) -
6258 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6259 return;
6260 }
6261
6262 case 2:
6263 {
6264 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6265 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6266
6267 for (i = 0; i < (full ? 4 : 2); i++)
6268 aarch64_set_vec_u64 (cpu, vd, i,
6269 aarch64_get_vec_u64 (cpu, vd, i) -
6270 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6271 return;
6272 }
6273
6274 case 0:
6275 case 3:
6276 default:
6277 HALT_NYI;
6278 }
6279 }
6280
6281 static void
6282 do_vec_SUB (sim_cpu *cpu)
6283 {
6284 /* instr [31] = 0
6285 instr [30] = half(0)/full(1)
6286 instr [29,24] = 10 1110
6287 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6288 instr [21] = 1
6289 instr [20,16] = Vm
6290 instr [15,10] = 10 0001
6291 instr [9, 5] = Vn
6292 instr [4, 0] = Vd. */
6293
6294 unsigned full = INSTR (30, 30);
6295 unsigned vm = INSTR (20, 16);
6296 unsigned vn = INSTR (9, 5);
6297 unsigned vd = INSTR (4, 0);
6298 unsigned i;
6299
6300 NYI_assert (29, 24, 0x2E);
6301 NYI_assert (21, 21, 1);
6302 NYI_assert (15, 10, 0x21);
6303
6304 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6305 switch (INSTR (23, 22))
6306 {
6307 case 0:
6308 for (i = 0; i < (full ? 16 : 8); i++)
6309 aarch64_set_vec_s8 (cpu, vd, i,
6310 aarch64_get_vec_s8 (cpu, vn, i)
6311 - aarch64_get_vec_s8 (cpu, vm, i));
6312 return;
6313
6314 case 1:
6315 for (i = 0; i < (full ? 8 : 4); i++)
6316 aarch64_set_vec_s16 (cpu, vd, i,
6317 aarch64_get_vec_s16 (cpu, vn, i)
6318 - aarch64_get_vec_s16 (cpu, vm, i));
6319 return;
6320
6321 case 2:
6322 for (i = 0; i < (full ? 4 : 2); i++)
6323 aarch64_set_vec_s32 (cpu, vd, i,
6324 aarch64_get_vec_s32 (cpu, vn, i)
6325 - aarch64_get_vec_s32 (cpu, vm, i));
6326 return;
6327
6328 case 3:
6329 if (full == 0)
6330 HALT_UNALLOC;
6331
6332 for (i = 0; i < 2; i++)
6333 aarch64_set_vec_s64 (cpu, vd, i,
6334 aarch64_get_vec_s64 (cpu, vn, i)
6335 - aarch64_get_vec_s64 (cpu, vm, i));
6336 return;
6337 }
6338 }
6339
6340 static void
6341 do_vec_MLS (sim_cpu *cpu)
6342 {
6343 /* instr [31] = 0
6344 instr [30] = half(0)/full(1)
6345 instr [29,24] = 10 1110
6346 instr [23,22] = size: byte(00, half(01), word (10)
6347 instr [21] = 1
6348 instr [20,16] = Vm
6349 instr [15,10] = 10 0101
6350 instr [9, 5] = Vn
6351 instr [4, 0] = Vd. */
6352
6353 unsigned full = INSTR (30, 30);
6354 unsigned vm = INSTR (20, 16);
6355 unsigned vn = INSTR (9, 5);
6356 unsigned vd = INSTR (4, 0);
6357 unsigned i;
6358
6359 NYI_assert (29, 24, 0x2E);
6360 NYI_assert (21, 21, 1);
6361 NYI_assert (15, 10, 0x25);
6362
6363 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6364 switch (INSTR (23, 22))
6365 {
6366 case 0:
6367 for (i = 0; i < (full ? 16 : 8); i++)
6368 aarch64_set_vec_u8 (cpu, vd, i,
6369 aarch64_get_vec_u8 (cpu, vd, i)
6370 - (aarch64_get_vec_u8 (cpu, vn, i)
6371 * aarch64_get_vec_u8 (cpu, vm, i)));
6372 return;
6373
6374 case 1:
6375 for (i = 0; i < (full ? 8 : 4); i++)
6376 aarch64_set_vec_u16 (cpu, vd, i,
6377 aarch64_get_vec_u16 (cpu, vd, i)
6378 - (aarch64_get_vec_u16 (cpu, vn, i)
6379 * aarch64_get_vec_u16 (cpu, vm, i)));
6380 return;
6381
6382 case 2:
6383 for (i = 0; i < (full ? 4 : 2); i++)
6384 aarch64_set_vec_u32 (cpu, vd, i,
6385 aarch64_get_vec_u32 (cpu, vd, i)
6386 - (aarch64_get_vec_u32 (cpu, vn, i)
6387 * aarch64_get_vec_u32 (cpu, vm, i)));
6388 return;
6389
6390 default:
6391 HALT_UNALLOC;
6392 }
6393 }
6394
6395 static void
6396 do_vec_FDIV (sim_cpu *cpu)
6397 {
6398 /* instr [31] = 0
6399 instr [30] = half(0)/full(1)
6400 instr [29,23] = 10 1110 0
6401 instr [22] = float()/double(1)
6402 instr [21] = 1
6403 instr [20,16] = Vm
6404 instr [15,10] = 1111 11
6405 instr [9, 5] = Vn
6406 instr [4, 0] = Vd. */
6407
6408 unsigned full = INSTR (30, 30);
6409 unsigned vm = INSTR (20, 16);
6410 unsigned vn = INSTR (9, 5);
6411 unsigned vd = INSTR (4, 0);
6412 unsigned i;
6413
6414 NYI_assert (29, 23, 0x5C);
6415 NYI_assert (21, 21, 1);
6416 NYI_assert (15, 10, 0x3F);
6417
6418 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6419 if (INSTR (22, 22))
6420 {
6421 if (! full)
6422 HALT_UNALLOC;
6423
6424 for (i = 0; i < 2; i++)
6425 aarch64_set_vec_double (cpu, vd, i,
6426 aarch64_get_vec_double (cpu, vn, i)
6427 / aarch64_get_vec_double (cpu, vm, i));
6428 }
6429 else
6430 for (i = 0; i < (full ? 4 : 2); i++)
6431 aarch64_set_vec_float (cpu, vd, i,
6432 aarch64_get_vec_float (cpu, vn, i)
6433 / aarch64_get_vec_float (cpu, vm, i));
6434 }
6435
6436 static void
6437 do_vec_FMUL (sim_cpu *cpu)
6438 {
6439 /* instr [31] = 0
6440 instr [30] = half(0)/full(1)
6441 instr [29,23] = 10 1110 0
6442 instr [22] = float(0)/double(1)
6443 instr [21] = 1
6444 instr [20,16] = Vm
6445 instr [15,10] = 1101 11
6446 instr [9, 5] = Vn
6447 instr [4, 0] = Vd. */
6448
6449 unsigned full = INSTR (30, 30);
6450 unsigned vm = INSTR (20, 16);
6451 unsigned vn = INSTR (9, 5);
6452 unsigned vd = INSTR (4, 0);
6453 unsigned i;
6454
6455 NYI_assert (29, 23, 0x5C);
6456 NYI_assert (21, 21, 1);
6457 NYI_assert (15, 10, 0x37);
6458
6459 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6460 if (INSTR (22, 22))
6461 {
6462 if (! full)
6463 HALT_UNALLOC;
6464
6465 for (i = 0; i < 2; i++)
6466 aarch64_set_vec_double (cpu, vd, i,
6467 aarch64_get_vec_double (cpu, vn, i)
6468 * aarch64_get_vec_double (cpu, vm, i));
6469 }
6470 else
6471 for (i = 0; i < (full ? 4 : 2); i++)
6472 aarch64_set_vec_float (cpu, vd, i,
6473 aarch64_get_vec_float (cpu, vn, i)
6474 * aarch64_get_vec_float (cpu, vm, i));
6475 }
6476
6477 static void
6478 do_vec_FADDP (sim_cpu *cpu)
6479 {
6480 /* instr [31] = 0
6481 instr [30] = half(0)/full(1)
6482 instr [29,23] = 10 1110 0
6483 instr [22] = float(0)/double(1)
6484 instr [21] = 1
6485 instr [20,16] = Vm
6486 instr [15,10] = 1101 01
6487 instr [9, 5] = Vn
6488 instr [4, 0] = Vd. */
6489
6490 unsigned full = INSTR (30, 30);
6491 unsigned vm = INSTR (20, 16);
6492 unsigned vn = INSTR (9, 5);
6493 unsigned vd = INSTR (4, 0);
6494
6495 NYI_assert (29, 23, 0x5C);
6496 NYI_assert (21, 21, 1);
6497 NYI_assert (15, 10, 0x35);
6498
6499 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6500 if (INSTR (22, 22))
6501 {
6502 /* Extract values before adding them incase vd == vn/vm. */
6503 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6504 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6505 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6506 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6507
6508 if (! full)
6509 HALT_UNALLOC;
6510
6511 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6512 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6513 }
6514 else
6515 {
6516 /* Extract values before adding them incase vd == vn/vm. */
6517 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6518 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6519 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6520 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6521
6522 if (full)
6523 {
6524 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6525 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6526 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6527 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6528
6529 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6530 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6531 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6532 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6533 }
6534 else
6535 {
6536 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6537 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6538 }
6539 }
6540 }
6541
6542 static void
6543 do_vec_FSQRT (sim_cpu *cpu)
6544 {
6545 /* instr[31] = 0
6546 instr[30] = half(0)/full(1)
6547 instr[29,23] = 10 1110 1
6548 instr[22] = single(0)/double(1)
6549 instr[21,10] = 10 0001 1111 10
6550 instr[9,5] = Vsrc
6551 instr[4,0] = Vdest. */
6552
6553 unsigned vn = INSTR (9, 5);
6554 unsigned vd = INSTR (4, 0);
6555 unsigned full = INSTR (30, 30);
6556 int i;
6557
6558 NYI_assert (29, 23, 0x5D);
6559 NYI_assert (21, 10, 0x87E);
6560
6561 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6562 if (INSTR (22, 22))
6563 {
6564 if (! full)
6565 HALT_UNALLOC;
6566
6567 for (i = 0; i < 2; i++)
6568 aarch64_set_vec_double (cpu, vd, i,
6569 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6570 }
6571 else
6572 {
6573 for (i = 0; i < (full ? 4 : 2); i++)
6574 aarch64_set_vec_float (cpu, vd, i,
6575 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6576 }
6577 }
6578
6579 static void
6580 do_vec_FNEG (sim_cpu *cpu)
6581 {
6582 /* instr[31] = 0
6583 instr[30] = half (0)/full (1)
6584 instr[29,23] = 10 1110 1
6585 instr[22] = single (0)/double (1)
6586 instr[21,10] = 10 0000 1111 10
6587 instr[9,5] = Vsrc
6588 instr[4,0] = Vdest. */
6589
6590 unsigned vn = INSTR (9, 5);
6591 unsigned vd = INSTR (4, 0);
6592 unsigned full = INSTR (30, 30);
6593 int i;
6594
6595 NYI_assert (29, 23, 0x5D);
6596 NYI_assert (21, 10, 0x83E);
6597
6598 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6599 if (INSTR (22, 22))
6600 {
6601 if (! full)
6602 HALT_UNALLOC;
6603
6604 for (i = 0; i < 2; i++)
6605 aarch64_set_vec_double (cpu, vd, i,
6606 - aarch64_get_vec_double (cpu, vn, i));
6607 }
6608 else
6609 {
6610 for (i = 0; i < (full ? 4 : 2); i++)
6611 aarch64_set_vec_float (cpu, vd, i,
6612 - aarch64_get_vec_float (cpu, vn, i));
6613 }
6614 }
6615
6616 static void
6617 do_vec_NOT (sim_cpu *cpu)
6618 {
6619 /* instr[31] = 0
6620 instr[30] = half (0)/full (1)
6621 instr[29,10] = 10 1110 0010 0000 0101 10
6622 instr[9,5] = Vn
6623 instr[4.0] = Vd. */
6624
6625 unsigned vn = INSTR (9, 5);
6626 unsigned vd = INSTR (4, 0);
6627 unsigned i;
6628 int full = INSTR (30, 30);
6629
6630 NYI_assert (29, 10, 0xB8816);
6631
6632 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6633 for (i = 0; i < (full ? 16 : 8); i++)
6634 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6635 }
6636
6637 static unsigned int
6638 clz (uint64_t val, unsigned size)
6639 {
6640 uint64_t mask = 1;
6641 int count;
6642
6643 mask <<= (size - 1);
6644 count = 0;
6645 do
6646 {
6647 if (val & mask)
6648 break;
6649 mask >>= 1;
6650 count ++;
6651 }
6652 while (mask);
6653
6654 return count;
6655 }
6656
6657 static void
6658 do_vec_CLZ (sim_cpu *cpu)
6659 {
6660 /* instr[31] = 0
6661 instr[30] = half (0)/full (1)
6662 instr[29,24] = 10 1110
6663 instr[23,22] = size
6664 instr[21,10] = 10 0000 0100 10
6665 instr[9,5] = Vn
6666 instr[4.0] = Vd. */
6667
6668 unsigned vn = INSTR (9, 5);
6669 unsigned vd = INSTR (4, 0);
6670 unsigned i;
6671 int full = INSTR (30,30);
6672
6673 NYI_assert (29, 24, 0x2E);
6674 NYI_assert (21, 10, 0x812);
6675
6676 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6677 switch (INSTR (23, 22))
6678 {
6679 case 0:
6680 for (i = 0; i < (full ? 16 : 8); i++)
6681 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6682 break;
6683 case 1:
6684 for (i = 0; i < (full ? 8 : 4); i++)
6685 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6686 break;
6687 case 2:
6688 for (i = 0; i < (full ? 4 : 2); i++)
6689 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6690 break;
6691 case 3:
6692 if (! full)
6693 HALT_UNALLOC;
6694 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6695 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6696 break;
6697 }
6698 }
6699
6700 static void
6701 do_vec_MOV_element (sim_cpu *cpu)
6702 {
6703 /* instr[31,21] = 0110 1110 000
6704 instr[20,16] = size & dest index
6705 instr[15] = 0
6706 instr[14,11] = source index
6707 instr[10] = 1
6708 instr[9,5] = Vs
6709 instr[4.0] = Vd. */
6710
6711 unsigned vs = INSTR (9, 5);
6712 unsigned vd = INSTR (4, 0);
6713 unsigned src_index;
6714 unsigned dst_index;
6715
6716 NYI_assert (31, 21, 0x370);
6717 NYI_assert (15, 15, 0);
6718 NYI_assert (10, 10, 1);
6719
6720 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6721 if (INSTR (16, 16))
6722 {
6723 /* Move a byte. */
6724 src_index = INSTR (14, 11);
6725 dst_index = INSTR (20, 17);
6726 aarch64_set_vec_u8 (cpu, vd, dst_index,
6727 aarch64_get_vec_u8 (cpu, vs, src_index));
6728 }
6729 else if (INSTR (17, 17))
6730 {
6731 /* Move 16-bits. */
6732 NYI_assert (11, 11, 0);
6733 src_index = INSTR (14, 12);
6734 dst_index = INSTR (20, 18);
6735 aarch64_set_vec_u16 (cpu, vd, dst_index,
6736 aarch64_get_vec_u16 (cpu, vs, src_index));
6737 }
6738 else if (INSTR (18, 18))
6739 {
6740 /* Move 32-bits. */
6741 NYI_assert (12, 11, 0);
6742 src_index = INSTR (14, 13);
6743 dst_index = INSTR (20, 19);
6744 aarch64_set_vec_u32 (cpu, vd, dst_index,
6745 aarch64_get_vec_u32 (cpu, vs, src_index));
6746 }
6747 else
6748 {
6749 NYI_assert (19, 19, 1);
6750 NYI_assert (13, 11, 0);
6751 src_index = INSTR (14, 14);
6752 dst_index = INSTR (20, 20);
6753 aarch64_set_vec_u64 (cpu, vd, dst_index,
6754 aarch64_get_vec_u64 (cpu, vs, src_index));
6755 }
6756 }
6757
6758 static void
6759 do_vec_REV32 (sim_cpu *cpu)
6760 {
6761 /* instr[31] = 0
6762 instr[30] = full/half
6763 instr[29,24] = 10 1110
6764 instr[23,22] = size
6765 instr[21,10] = 10 0000 0000 10
6766 instr[9,5] = Rn
6767 instr[4,0] = Rd. */
6768
6769 unsigned rn = INSTR (9, 5);
6770 unsigned rd = INSTR (4, 0);
6771 unsigned size = INSTR (23, 22);
6772 unsigned full = INSTR (30, 30);
6773 unsigned i;
6774 FRegister val;
6775
6776 NYI_assert (29, 24, 0x2E);
6777 NYI_assert (21, 10, 0x802);
6778
6779 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6780 switch (size)
6781 {
6782 case 0:
6783 for (i = 0; i < (full ? 16 : 8); i++)
6784 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6785 break;
6786
6787 case 1:
6788 for (i = 0; i < (full ? 8 : 4); i++)
6789 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6790 break;
6791
6792 default:
6793 HALT_UNALLOC;
6794 }
6795
6796 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6797 if (full)
6798 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6799 }
6800
6801 static void
6802 do_vec_EXT (sim_cpu *cpu)
6803 {
6804 /* instr[31] = 0
6805 instr[30] = full/half
6806 instr[29,21] = 10 1110 000
6807 instr[20,16] = Vm
6808 instr[15] = 0
6809 instr[14,11] = source index
6810 instr[10] = 0
6811 instr[9,5] = Vn
6812 instr[4.0] = Vd. */
6813
6814 unsigned vm = INSTR (20, 16);
6815 unsigned vn = INSTR (9, 5);
6816 unsigned vd = INSTR (4, 0);
6817 unsigned src_index = INSTR (14, 11);
6818 unsigned full = INSTR (30, 30);
6819 unsigned i;
6820 unsigned j;
6821 FRegister val;
6822
6823 NYI_assert (31, 21, 0x370);
6824 NYI_assert (15, 15, 0);
6825 NYI_assert (10, 10, 0);
6826
6827 if (!full && (src_index & 0x8))
6828 HALT_UNALLOC;
6829
6830 j = 0;
6831
6832 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6833 for (i = src_index; i < (full ? 16 : 8); i++)
6834 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6835 for (i = 0; i < src_index; i++)
6836 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6837
6838 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6839 if (full)
6840 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6841 }
6842
6843 static void
6844 dexAdvSIMD0 (sim_cpu *cpu)
6845 {
6846 /* instr [28,25] = 0 111. */
6847 if ( INSTR (15, 10) == 0x07
6848 && (INSTR (9, 5) ==
6849 INSTR (20, 16)))
6850 {
6851 if (INSTR (31, 21) == 0x075
6852 || INSTR (31, 21) == 0x275)
6853 {
6854 do_vec_MOV_whole_vector (cpu);
6855 return;
6856 }
6857 }
6858
6859 if (INSTR (29, 19) == 0x1E0)
6860 {
6861 do_vec_MOV_immediate (cpu);
6862 return;
6863 }
6864
6865 if (INSTR (29, 19) == 0x5E0)
6866 {
6867 do_vec_MVNI (cpu);
6868 return;
6869 }
6870
6871 if (INSTR (29, 19) == 0x1C0
6872 || INSTR (29, 19) == 0x1C1)
6873 {
6874 if (INSTR (15, 10) == 0x03)
6875 {
6876 do_vec_DUP_scalar_into_vector (cpu);
6877 return;
6878 }
6879 }
6880
6881 switch (INSTR (29, 24))
6882 {
6883 case 0x0E: do_vec_op1 (cpu); return;
6884 case 0x0F: do_vec_op2 (cpu); return;
6885
6886 case 0x2E:
6887 if (INSTR (21, 21) == 1)
6888 {
6889 switch (INSTR (15, 10))
6890 {
6891 case 0x02:
6892 do_vec_REV32 (cpu);
6893 return;
6894
6895 case 0x07:
6896 switch (INSTR (23, 22))
6897 {
6898 case 0: do_vec_EOR (cpu); return;
6899 case 1: do_vec_BSL (cpu); return;
6900 case 2:
6901 case 3: do_vec_bit (cpu); return;
6902 }
6903 break;
6904
6905 case 0x08: do_vec_sub_long (cpu); return;
6906 case 0x11: do_vec_USHL (cpu); return;
6907 case 0x12: do_vec_CLZ (cpu); return;
6908 case 0x16: do_vec_NOT (cpu); return;
6909 case 0x19: do_vec_max (cpu); return;
6910 case 0x1B: do_vec_min (cpu); return;
6911 case 0x21: do_vec_SUB (cpu); return;
6912 case 0x25: do_vec_MLS (cpu); return;
6913 case 0x31: do_vec_FminmaxNMP (cpu); return;
6914 case 0x35: do_vec_FADDP (cpu); return;
6915 case 0x37: do_vec_FMUL (cpu); return;
6916 case 0x3F: do_vec_FDIV (cpu); return;
6917
6918 case 0x3E:
6919 switch (INSTR (20, 16))
6920 {
6921 case 0x00: do_vec_FNEG (cpu); return;
6922 case 0x01: do_vec_FSQRT (cpu); return;
6923 default: HALT_NYI;
6924 }
6925
6926 case 0x0D:
6927 case 0x0F:
6928 case 0x22:
6929 case 0x23:
6930 case 0x26:
6931 case 0x2A:
6932 case 0x32:
6933 case 0x36:
6934 case 0x39:
6935 case 0x3A:
6936 do_vec_compare (cpu); return;
6937
6938 default:
6939 break;
6940 }
6941 }
6942
6943 if (INSTR (31, 21) == 0x370)
6944 {
6945 if (INSTR (10, 10))
6946 do_vec_MOV_element (cpu);
6947 else
6948 do_vec_EXT (cpu);
6949 return;
6950 }
6951
6952 switch (INSTR (21, 10))
6953 {
6954 case 0x82E: do_vec_neg (cpu); return;
6955 case 0x87E: do_vec_sqrt (cpu); return;
6956 default:
6957 if (INSTR (15, 10) == 0x30)
6958 {
6959 do_vec_mull (cpu);
6960 return;
6961 }
6962 break;
6963 }
6964 break;
6965
6966 case 0x2f:
6967 switch (INSTR (15, 10))
6968 {
6969 case 0x01: do_vec_SSHR_USHR (cpu); return;
6970 case 0x10:
6971 case 0x12: do_vec_mls_indexed (cpu); return;
6972 case 0x29: do_vec_xtl (cpu); return;
6973 default:
6974 HALT_NYI;
6975 }
6976
6977 default:
6978 break;
6979 }
6980
6981 HALT_NYI;
6982 }
6983
6984 /* 3 sources. */
6985
6986 /* Float multiply add. */
6987 static void
6988 fmadds (sim_cpu *cpu)
6989 {
6990 unsigned sa = INSTR (14, 10);
6991 unsigned sm = INSTR (20, 16);
6992 unsigned sn = INSTR ( 9, 5);
6993 unsigned sd = INSTR ( 4, 0);
6994
6995 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6996 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6997 + aarch64_get_FP_float (cpu, sn)
6998 * aarch64_get_FP_float (cpu, sm));
6999 }
7000
7001 /* Double multiply add. */
7002 static void
7003 fmaddd (sim_cpu *cpu)
7004 {
7005 unsigned sa = INSTR (14, 10);
7006 unsigned sm = INSTR (20, 16);
7007 unsigned sn = INSTR ( 9, 5);
7008 unsigned sd = INSTR ( 4, 0);
7009
7010 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7011 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7012 + aarch64_get_FP_double (cpu, sn)
7013 * aarch64_get_FP_double (cpu, sm));
7014 }
7015
7016 /* Float multiply subtract. */
7017 static void
7018 fmsubs (sim_cpu *cpu)
7019 {
7020 unsigned sa = INSTR (14, 10);
7021 unsigned sm = INSTR (20, 16);
7022 unsigned sn = INSTR ( 9, 5);
7023 unsigned sd = INSTR ( 4, 0);
7024
7025 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7026 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7027 - aarch64_get_FP_float (cpu, sn)
7028 * aarch64_get_FP_float (cpu, sm));
7029 }
7030
7031 /* Double multiply subtract. */
7032 static void
7033 fmsubd (sim_cpu *cpu)
7034 {
7035 unsigned sa = INSTR (14, 10);
7036 unsigned sm = INSTR (20, 16);
7037 unsigned sn = INSTR ( 9, 5);
7038 unsigned sd = INSTR ( 4, 0);
7039
7040 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7041 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7042 - aarch64_get_FP_double (cpu, sn)
7043 * aarch64_get_FP_double (cpu, sm));
7044 }
7045
7046 /* Float negative multiply add. */
7047 static void
7048 fnmadds (sim_cpu *cpu)
7049 {
7050 unsigned sa = INSTR (14, 10);
7051 unsigned sm = INSTR (20, 16);
7052 unsigned sn = INSTR ( 9, 5);
7053 unsigned sd = INSTR ( 4, 0);
7054
7055 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7056 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7057 + (- aarch64_get_FP_float (cpu, sn))
7058 * aarch64_get_FP_float (cpu, sm));
7059 }
7060
7061 /* Double negative multiply add. */
7062 static void
7063 fnmaddd (sim_cpu *cpu)
7064 {
7065 unsigned sa = INSTR (14, 10);
7066 unsigned sm = INSTR (20, 16);
7067 unsigned sn = INSTR ( 9, 5);
7068 unsigned sd = INSTR ( 4, 0);
7069
7070 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7071 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7072 + (- aarch64_get_FP_double (cpu, sn))
7073 * aarch64_get_FP_double (cpu, sm));
7074 }
7075
7076 /* Float negative multiply subtract. */
7077 static void
7078 fnmsubs (sim_cpu *cpu)
7079 {
7080 unsigned sa = INSTR (14, 10);
7081 unsigned sm = INSTR (20, 16);
7082 unsigned sn = INSTR ( 9, 5);
7083 unsigned sd = INSTR ( 4, 0);
7084
7085 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7086 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7087 + aarch64_get_FP_float (cpu, sn)
7088 * aarch64_get_FP_float (cpu, sm));
7089 }
7090
7091 /* Double negative multiply subtract. */
7092 static void
7093 fnmsubd (sim_cpu *cpu)
7094 {
7095 unsigned sa = INSTR (14, 10);
7096 unsigned sm = INSTR (20, 16);
7097 unsigned sn = INSTR ( 9, 5);
7098 unsigned sd = INSTR ( 4, 0);
7099
7100 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7101 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7102 + aarch64_get_FP_double (cpu, sn)
7103 * aarch64_get_FP_double (cpu, sm));
7104 }
7105
7106 static void
7107 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7108 {
7109 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7110 instr[30] = 0
7111 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7112 instr[28,25] = 1111
7113 instr[24] = 1
7114 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7115 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7116 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7117
7118 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7119 /* dispatch on combined type:o1:o2. */
7120 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7121
7122 if (M_S != 0)
7123 HALT_UNALLOC;
7124
7125 switch (dispatch)
7126 {
7127 case 0: fmadds (cpu); return;
7128 case 1: fmsubs (cpu); return;
7129 case 2: fnmadds (cpu); return;
7130 case 3: fnmsubs (cpu); return;
7131 case 4: fmaddd (cpu); return;
7132 case 5: fmsubd (cpu); return;
7133 case 6: fnmaddd (cpu); return;
7134 case 7: fnmsubd (cpu); return;
7135 default:
7136 /* type > 1 is currently unallocated. */
7137 HALT_UNALLOC;
7138 }
7139 }
7140
7141 static void
7142 dexSimpleFPFixedConvert (sim_cpu *cpu)
7143 {
7144 HALT_NYI;
7145 }
7146
7147 static void
7148 dexSimpleFPCondCompare (sim_cpu *cpu)
7149 {
7150 /* instr [31,23] = 0001 1110 0
7151 instr [22] = type
7152 instr [21] = 1
7153 instr [20,16] = Rm
7154 instr [15,12] = condition
7155 instr [11,10] = 01
7156 instr [9,5] = Rn
7157 instr [4] = 0
7158 instr [3,0] = nzcv */
7159
7160 unsigned rm = INSTR (20, 16);
7161 unsigned rn = INSTR (9, 5);
7162
7163 NYI_assert (31, 23, 0x3C);
7164 NYI_assert (11, 10, 0x1);
7165 NYI_assert (4, 4, 0);
7166
7167 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7168 if (! testConditionCode (cpu, INSTR (15, 12)))
7169 {
7170 aarch64_set_CPSR (cpu, INSTR (3, 0));
7171 return;
7172 }
7173
7174 if (INSTR (22, 22))
7175 {
7176 /* Double precision. */
7177 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7178 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7179
7180 /* FIXME: Check for NaNs. */
7181 if (val1 == val2)
7182 aarch64_set_CPSR (cpu, (Z | C));
7183 else if (val1 < val2)
7184 aarch64_set_CPSR (cpu, N);
7185 else /* val1 > val2 */
7186 aarch64_set_CPSR (cpu, C);
7187 }
7188 else
7189 {
7190 /* Single precision. */
7191 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7192 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7193
7194 /* FIXME: Check for NaNs. */
7195 if (val1 == val2)
7196 aarch64_set_CPSR (cpu, (Z | C));
7197 else if (val1 < val2)
7198 aarch64_set_CPSR (cpu, N);
7199 else /* val1 > val2 */
7200 aarch64_set_CPSR (cpu, C);
7201 }
7202 }
7203
7204 /* 2 sources. */
7205
7206 /* Float add. */
7207 static void
7208 fadds (sim_cpu *cpu)
7209 {
7210 unsigned sm = INSTR (20, 16);
7211 unsigned sn = INSTR ( 9, 5);
7212 unsigned sd = INSTR ( 4, 0);
7213
7214 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7215 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7216 + aarch64_get_FP_float (cpu, sm));
7217 }
7218
7219 /* Double add. */
7220 static void
7221 faddd (sim_cpu *cpu)
7222 {
7223 unsigned sm = INSTR (20, 16);
7224 unsigned sn = INSTR ( 9, 5);
7225 unsigned sd = INSTR ( 4, 0);
7226
7227 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7228 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7229 + aarch64_get_FP_double (cpu, sm));
7230 }
7231
7232 /* Float divide. */
7233 static void
7234 fdivs (sim_cpu *cpu)
7235 {
7236 unsigned sm = INSTR (20, 16);
7237 unsigned sn = INSTR ( 9, 5);
7238 unsigned sd = INSTR ( 4, 0);
7239
7240 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7241 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7242 / aarch64_get_FP_float (cpu, sm));
7243 }
7244
7245 /* Double divide. */
7246 static void
7247 fdivd (sim_cpu *cpu)
7248 {
7249 unsigned sm = INSTR (20, 16);
7250 unsigned sn = INSTR ( 9, 5);
7251 unsigned sd = INSTR ( 4, 0);
7252
7253 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7254 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7255 / aarch64_get_FP_double (cpu, sm));
7256 }
7257
7258 /* Float multiply. */
7259 static void
7260 fmuls (sim_cpu *cpu)
7261 {
7262 unsigned sm = INSTR (20, 16);
7263 unsigned sn = INSTR ( 9, 5);
7264 unsigned sd = INSTR ( 4, 0);
7265
7266 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7267 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7268 * aarch64_get_FP_float (cpu, sm));
7269 }
7270
7271 /* Double multiply. */
7272 static void
7273 fmuld (sim_cpu *cpu)
7274 {
7275 unsigned sm = INSTR (20, 16);
7276 unsigned sn = INSTR ( 9, 5);
7277 unsigned sd = INSTR ( 4, 0);
7278
7279 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7280 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7281 * aarch64_get_FP_double (cpu, sm));
7282 }
7283
7284 /* Float negate and multiply. */
7285 static void
7286 fnmuls (sim_cpu *cpu)
7287 {
7288 unsigned sm = INSTR (20, 16);
7289 unsigned sn = INSTR ( 9, 5);
7290 unsigned sd = INSTR ( 4, 0);
7291
7292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7293 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7294 * aarch64_get_FP_float (cpu, sm)));
7295 }
7296
7297 /* Double negate and multiply. */
7298 static void
7299 fnmuld (sim_cpu *cpu)
7300 {
7301 unsigned sm = INSTR (20, 16);
7302 unsigned sn = INSTR ( 9, 5);
7303 unsigned sd = INSTR ( 4, 0);
7304
7305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7306 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7307 * aarch64_get_FP_double (cpu, sm)));
7308 }
7309
7310 /* Float subtract. */
7311 static void
7312 fsubs (sim_cpu *cpu)
7313 {
7314 unsigned sm = INSTR (20, 16);
7315 unsigned sn = INSTR ( 9, 5);
7316 unsigned sd = INSTR ( 4, 0);
7317
7318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7319 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7320 - aarch64_get_FP_float (cpu, sm));
7321 }
7322
7323 /* Double subtract. */
7324 static void
7325 fsubd (sim_cpu *cpu)
7326 {
7327 unsigned sm = INSTR (20, 16);
7328 unsigned sn = INSTR ( 9, 5);
7329 unsigned sd = INSTR ( 4, 0);
7330
7331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7332 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7333 - aarch64_get_FP_double (cpu, sm));
7334 }
7335
7336 static void
7337 do_FMINNM (sim_cpu *cpu)
7338 {
7339 /* instr[31,23] = 0 0011 1100
7340 instr[22] = float(0)/double(1)
7341 instr[21] = 1
7342 instr[20,16] = Sm
7343 instr[15,10] = 01 1110
7344 instr[9,5] = Sn
7345 instr[4,0] = Cpu */
7346
7347 unsigned sm = INSTR (20, 16);
7348 unsigned sn = INSTR ( 9, 5);
7349 unsigned sd = INSTR ( 4, 0);
7350
7351 NYI_assert (31, 23, 0x03C);
7352 NYI_assert (15, 10, 0x1E);
7353
7354 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7355 if (INSTR (22, 22))
7356 aarch64_set_FP_double (cpu, sd,
7357 dminnm (aarch64_get_FP_double (cpu, sn),
7358 aarch64_get_FP_double (cpu, sm)));
7359 else
7360 aarch64_set_FP_float (cpu, sd,
7361 fminnm (aarch64_get_FP_float (cpu, sn),
7362 aarch64_get_FP_float (cpu, sm)));
7363 }
7364
7365 static void
7366 do_FMAXNM (sim_cpu *cpu)
7367 {
7368 /* instr[31,23] = 0 0011 1100
7369 instr[22] = float(0)/double(1)
7370 instr[21] = 1
7371 instr[20,16] = Sm
7372 instr[15,10] = 01 1010
7373 instr[9,5] = Sn
7374 instr[4,0] = Cpu */
7375
7376 unsigned sm = INSTR (20, 16);
7377 unsigned sn = INSTR ( 9, 5);
7378 unsigned sd = INSTR ( 4, 0);
7379
7380 NYI_assert (31, 23, 0x03C);
7381 NYI_assert (15, 10, 0x1A);
7382
7383 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7384 if (INSTR (22, 22))
7385 aarch64_set_FP_double (cpu, sd,
7386 dmaxnm (aarch64_get_FP_double (cpu, sn),
7387 aarch64_get_FP_double (cpu, sm)));
7388 else
7389 aarch64_set_FP_float (cpu, sd,
7390 fmaxnm (aarch64_get_FP_float (cpu, sn),
7391 aarch64_get_FP_float (cpu, sm)));
7392 }
7393
7394 static void
7395 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7396 {
7397 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7398 instr[30] = 0
7399 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7400 instr[28,25] = 1111
7401 instr[24] = 0
7402 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7403 instr[21] = 1
7404 instr[20,16] = Vm
7405 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7406 0010 ==> FADD, 0011 ==> FSUB,
7407 0100 ==> FMAX, 0101 ==> FMIN
7408 0110 ==> FMAXNM, 0111 ==> FMINNM
7409 1000 ==> FNMUL, ow ==> UNALLOC
7410 instr[11,10] = 10
7411 instr[9,5] = Vn
7412 instr[4,0] = Vd */
7413
7414 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7415 uint32_t type = INSTR (23, 22);
7416 /* Dispatch on opcode. */
7417 uint32_t dispatch = INSTR (15, 12);
7418
7419 if (type > 1)
7420 HALT_UNALLOC;
7421
7422 if (M_S != 0)
7423 HALT_UNALLOC;
7424
7425 if (type)
7426 switch (dispatch)
7427 {
7428 case 0: fmuld (cpu); return;
7429 case 1: fdivd (cpu); return;
7430 case 2: faddd (cpu); return;
7431 case 3: fsubd (cpu); return;
7432 case 6: do_FMAXNM (cpu); return;
7433 case 7: do_FMINNM (cpu); return;
7434 case 8: fnmuld (cpu); return;
7435
7436 /* Have not yet implemented fmax and fmin. */
7437 case 4:
7438 case 5:
7439 HALT_NYI;
7440
7441 default:
7442 HALT_UNALLOC;
7443 }
7444 else /* type == 0 => floats. */
7445 switch (dispatch)
7446 {
7447 case 0: fmuls (cpu); return;
7448 case 1: fdivs (cpu); return;
7449 case 2: fadds (cpu); return;
7450 case 3: fsubs (cpu); return;
7451 case 6: do_FMAXNM (cpu); return;
7452 case 7: do_FMINNM (cpu); return;
7453 case 8: fnmuls (cpu); return;
7454
7455 case 4:
7456 case 5:
7457 HALT_NYI;
7458
7459 default:
7460 HALT_UNALLOC;
7461 }
7462 }
7463
7464 static void
7465 dexSimpleFPCondSelect (sim_cpu *cpu)
7466 {
7467 /* FCSEL
7468 instr[31,23] = 0 0011 1100
7469 instr[22] = 0=>single 1=>double
7470 instr[21] = 1
7471 instr[20,16] = Sm
7472 instr[15,12] = cond
7473 instr[11,10] = 11
7474 instr[9,5] = Sn
7475 instr[4,0] = Cpu */
7476 unsigned sm = INSTR (20, 16);
7477 unsigned sn = INSTR ( 9, 5);
7478 unsigned sd = INSTR ( 4, 0);
7479 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7480
7481 NYI_assert (31, 23, 0x03C);
7482 NYI_assert (11, 10, 0x3);
7483
7484 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7485 if (INSTR (22, 22))
7486 aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7487 : aarch64_get_FP_double (cpu, sm)));
7488 else
7489 aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7490 : aarch64_get_FP_float (cpu, sm)));
7491 }
7492
7493 /* Store 32 bit unscaled signed 9 bit. */
7494 static void
7495 fsturs (sim_cpu *cpu, int32_t offset)
7496 {
7497 unsigned int rn = INSTR (9, 5);
7498 unsigned int st = INSTR (4, 0);
7499
7500 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7501 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7502 aarch64_get_vec_u32 (cpu, st, 0));
7503 }
7504
7505 /* Store 64 bit unscaled signed 9 bit. */
7506 static void
7507 fsturd (sim_cpu *cpu, int32_t offset)
7508 {
7509 unsigned int rn = INSTR (9, 5);
7510 unsigned int st = INSTR (4, 0);
7511
7512 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7513 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7514 aarch64_get_vec_u64 (cpu, st, 0));
7515 }
7516
7517 /* Store 128 bit unscaled signed 9 bit. */
7518 static void
7519 fsturq (sim_cpu *cpu, int32_t offset)
7520 {
7521 unsigned int rn = INSTR (9, 5);
7522 unsigned int st = INSTR (4, 0);
7523 FRegister a;
7524
7525 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7526 aarch64_get_FP_long_double (cpu, st, & a);
7527 aarch64_set_mem_long_double (cpu,
7528 aarch64_get_reg_u64 (cpu, rn, 1)
7529 + offset, a);
7530 }
7531
7532 /* TODO FP move register. */
7533
7534 /* 32 bit fp to fp move register. */
7535 static void
7536 ffmovs (sim_cpu *cpu)
7537 {
7538 unsigned int rn = INSTR (9, 5);
7539 unsigned int st = INSTR (4, 0);
7540
7541 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7542 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7543 }
7544
7545 /* 64 bit fp to fp move register. */
7546 static void
7547 ffmovd (sim_cpu *cpu)
7548 {
7549 unsigned int rn = INSTR (9, 5);
7550 unsigned int st = INSTR (4, 0);
7551
7552 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7553 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7554 }
7555
7556 /* 32 bit GReg to Vec move register. */
7557 static void
7558 fgmovs (sim_cpu *cpu)
7559 {
7560 unsigned int rn = INSTR (9, 5);
7561 unsigned int st = INSTR (4, 0);
7562
7563 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7564 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7565 }
7566
7567 /* 64 bit g to fp move register. */
7568 static void
7569 fgmovd (sim_cpu *cpu)
7570 {
7571 unsigned int rn = INSTR (9, 5);
7572 unsigned int st = INSTR (4, 0);
7573
7574 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7575 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7576 }
7577
7578 /* 32 bit fp to g move register. */
7579 static void
7580 gfmovs (sim_cpu *cpu)
7581 {
7582 unsigned int rn = INSTR (9, 5);
7583 unsigned int st = INSTR (4, 0);
7584
7585 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7586 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7587 }
7588
7589 /* 64 bit fp to g move register. */
7590 static void
7591 gfmovd (sim_cpu *cpu)
7592 {
7593 unsigned int rn = INSTR (9, 5);
7594 unsigned int st = INSTR (4, 0);
7595
7596 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7597 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7598 }
7599
7600 /* FP move immediate
7601
7602 These install an immediate 8 bit value in the target register
7603 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7604 bit exponent. */
7605
7606 static void
7607 fmovs (sim_cpu *cpu)
7608 {
7609 unsigned int sd = INSTR (4, 0);
7610 uint32_t imm = INSTR (20, 13);
7611 float f = fp_immediate_for_encoding_32 (imm);
7612
7613 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7614 aarch64_set_FP_float (cpu, sd, f);
7615 }
7616
7617 static void
7618 fmovd (sim_cpu *cpu)
7619 {
7620 unsigned int sd = INSTR (4, 0);
7621 uint32_t imm = INSTR (20, 13);
7622 double d = fp_immediate_for_encoding_64 (imm);
7623
7624 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7625 aarch64_set_FP_double (cpu, sd, d);
7626 }
7627
7628 static void
7629 dexSimpleFPImmediate (sim_cpu *cpu)
7630 {
7631 /* instr[31,23] == 00111100
7632 instr[22] == type : single(0)/double(1)
7633 instr[21] == 1
7634 instr[20,13] == imm8
7635 instr[12,10] == 100
7636 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7637 instr[4,0] == Rd */
7638 uint32_t imm5 = INSTR (9, 5);
7639
7640 NYI_assert (31, 23, 0x3C);
7641
7642 if (imm5 != 0)
7643 HALT_UNALLOC;
7644
7645 if (INSTR (22, 22))
7646 fmovd (cpu);
7647 else
7648 fmovs (cpu);
7649 }
7650
7651 /* TODO specific decode and execute for group Load Store. */
7652
7653 /* TODO FP load/store single register (unscaled offset). */
7654
7655 /* TODO load 8 bit unscaled signed 9 bit. */
7656 /* TODO load 16 bit unscaled signed 9 bit. */
7657
7658 /* Load 32 bit unscaled signed 9 bit. */
7659 static void
7660 fldurs (sim_cpu *cpu, int32_t offset)
7661 {
7662 unsigned int rn = INSTR (9, 5);
7663 unsigned int st = INSTR (4, 0);
7664
7665 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7666 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7667 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7668 }
7669
7670 /* Load 64 bit unscaled signed 9 bit. */
7671 static void
7672 fldurd (sim_cpu *cpu, int32_t offset)
7673 {
7674 unsigned int rn = INSTR (9, 5);
7675 unsigned int st = INSTR (4, 0);
7676
7677 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7678 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7679 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7680 }
7681
7682 /* Load 128 bit unscaled signed 9 bit. */
7683 static void
7684 fldurq (sim_cpu *cpu, int32_t offset)
7685 {
7686 unsigned int rn = INSTR (9, 5);
7687 unsigned int st = INSTR (4, 0);
7688 FRegister a;
7689 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7690
7691 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7692 aarch64_get_mem_long_double (cpu, addr, & a);
7693 aarch64_set_FP_long_double (cpu, st, a);
7694 }
7695
7696 /* TODO store 8 bit unscaled signed 9 bit. */
7697 /* TODO store 16 bit unscaled signed 9 bit. */
7698
7699
7700 /* 1 source. */
7701
7702 /* Float absolute value. */
7703 static void
7704 fabss (sim_cpu *cpu)
7705 {
7706 unsigned sn = INSTR (9, 5);
7707 unsigned sd = INSTR (4, 0);
7708 float value = aarch64_get_FP_float (cpu, sn);
7709
7710 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7711 aarch64_set_FP_float (cpu, sd, fabsf (value));
7712 }
7713
7714 /* Double absolute value. */
7715 static void
7716 fabcpu (sim_cpu *cpu)
7717 {
7718 unsigned sn = INSTR (9, 5);
7719 unsigned sd = INSTR (4, 0);
7720 double value = aarch64_get_FP_double (cpu, sn);
7721
7722 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7723 aarch64_set_FP_double (cpu, sd, fabs (value));
7724 }
7725
7726 /* Float negative value. */
7727 static void
7728 fnegs (sim_cpu *cpu)
7729 {
7730 unsigned sn = INSTR (9, 5);
7731 unsigned sd = INSTR (4, 0);
7732
7733 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7734 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7735 }
7736
7737 /* Double negative value. */
7738 static void
7739 fnegd (sim_cpu *cpu)
7740 {
7741 unsigned sn = INSTR (9, 5);
7742 unsigned sd = INSTR (4, 0);
7743
7744 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7745 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7746 }
7747
7748 /* Float square root. */
7749 static void
7750 fsqrts (sim_cpu *cpu)
7751 {
7752 unsigned sn = INSTR (9, 5);
7753 unsigned sd = INSTR (4, 0);
7754
7755 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7756 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7757 }
7758
7759 /* Double square root. */
7760 static void
7761 fsqrtd (sim_cpu *cpu)
7762 {
7763 unsigned sn = INSTR (9, 5);
7764 unsigned sd = INSTR (4, 0);
7765
7766 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7767 aarch64_set_FP_double (cpu, sd,
7768 sqrt (aarch64_get_FP_double (cpu, sn)));
7769 }
7770
7771 /* Convert double to float. */
7772 static void
7773 fcvtds (sim_cpu *cpu)
7774 {
7775 unsigned sn = INSTR (9, 5);
7776 unsigned sd = INSTR (4, 0);
7777
7778 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7779 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7780 }
7781
7782 /* Convert float to double. */
7783 static void
7784 fcvtcpu (sim_cpu *cpu)
7785 {
7786 unsigned sn = INSTR (9, 5);
7787 unsigned sd = INSTR (4, 0);
7788
7789 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7790 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7791 }
7792
7793 static void
7794 do_FRINT (sim_cpu *cpu)
7795 {
7796 /* instr[31,23] = 0001 1110 0
7797 instr[22] = single(0)/double(1)
7798 instr[21,18] = 1001
7799 instr[17,15] = rounding mode
7800 instr[14,10] = 10000
7801 instr[9,5] = source
7802 instr[4,0] = dest */
7803
7804 float val;
7805 unsigned rs = INSTR (9, 5);
7806 unsigned rd = INSTR (4, 0);
7807 unsigned int rmode = INSTR (17, 15);
7808
7809 NYI_assert (31, 23, 0x03C);
7810 NYI_assert (21, 18, 0x9);
7811 NYI_assert (14, 10, 0x10);
7812
7813 if (rmode == 6 || rmode == 7)
7814 /* FIXME: Add support for rmode == 6 exactness check. */
7815 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7816
7817 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7818 if (INSTR (22, 22))
7819 {
7820 double val = aarch64_get_FP_double (cpu, rs);
7821
7822 switch (rmode)
7823 {
7824 case 0: /* mode N: nearest or even. */
7825 {
7826 double rval = round (val);
7827
7828 if (val - rval == 0.5)
7829 {
7830 if (((rval / 2.0) * 2.0) != rval)
7831 rval += 1.0;
7832 }
7833
7834 aarch64_set_FP_double (cpu, rd, round (val));
7835 return;
7836 }
7837
7838 case 1: /* mode P: towards +inf. */
7839 if (val < 0.0)
7840 aarch64_set_FP_double (cpu, rd, trunc (val));
7841 else
7842 aarch64_set_FP_double (cpu, rd, round (val));
7843 return;
7844
7845 case 2: /* mode M: towards -inf. */
7846 if (val < 0.0)
7847 aarch64_set_FP_double (cpu, rd, round (val));
7848 else
7849 aarch64_set_FP_double (cpu, rd, trunc (val));
7850 return;
7851
7852 case 3: /* mode Z: towards 0. */
7853 aarch64_set_FP_double (cpu, rd, trunc (val));
7854 return;
7855
7856 case 4: /* mode A: away from 0. */
7857 aarch64_set_FP_double (cpu, rd, round (val));
7858 return;
7859
7860 case 6: /* mode X: use FPCR with exactness check. */
7861 case 7: /* mode I: use FPCR mode. */
7862 HALT_NYI;
7863
7864 default:
7865 HALT_UNALLOC;
7866 }
7867 }
7868
7869 val = aarch64_get_FP_float (cpu, rs);
7870
7871 switch (rmode)
7872 {
7873 case 0: /* mode N: nearest or even. */
7874 {
7875 float rval = roundf (val);
7876
7877 if (val - rval == 0.5)
7878 {
7879 if (((rval / 2.0) * 2.0) != rval)
7880 rval += 1.0;
7881 }
7882
7883 aarch64_set_FP_float (cpu, rd, rval);
7884 return;
7885 }
7886
7887 case 1: /* mode P: towards +inf. */
7888 if (val < 0.0)
7889 aarch64_set_FP_float (cpu, rd, truncf (val));
7890 else
7891 aarch64_set_FP_float (cpu, rd, roundf (val));
7892 return;
7893
7894 case 2: /* mode M: towards -inf. */
7895 if (val < 0.0)
7896 aarch64_set_FP_float (cpu, rd, truncf (val));
7897 else
7898 aarch64_set_FP_float (cpu, rd, roundf (val));
7899 return;
7900
7901 case 3: /* mode Z: towards 0. */
7902 aarch64_set_FP_float (cpu, rd, truncf (val));
7903 return;
7904
7905 case 4: /* mode A: away from 0. */
7906 aarch64_set_FP_float (cpu, rd, roundf (val));
7907 return;
7908
7909 case 6: /* mode X: use FPCR with exactness check. */
7910 case 7: /* mode I: use FPCR mode. */
7911 HALT_NYI;
7912
7913 default:
7914 HALT_UNALLOC;
7915 }
7916 }
7917
7918 /* Convert half to float. */
7919 static void
7920 do_FCVT_half_to_single (sim_cpu *cpu)
7921 {
7922 unsigned rn = INSTR (9, 5);
7923 unsigned rd = INSTR (4, 0);
7924
7925 NYI_assert (31, 10, 0x7B890);
7926
7927 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7928 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
7929 }
7930
7931 /* Convert half to double. */
7932 static void
7933 do_FCVT_half_to_double (sim_cpu *cpu)
7934 {
7935 unsigned rn = INSTR (9, 5);
7936 unsigned rd = INSTR (4, 0);
7937
7938 NYI_assert (31, 10, 0x7B8B0);
7939
7940 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7941 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
7942 }
7943
7944 static void
7945 do_FCVT_single_to_half (sim_cpu *cpu)
7946 {
7947 unsigned rn = INSTR (9, 5);
7948 unsigned rd = INSTR (4, 0);
7949
7950 NYI_assert (31, 10, 0x788F0);
7951
7952 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7953 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
7954 }
7955
7956 /* Convert double to half. */
7957 static void
7958 do_FCVT_double_to_half (sim_cpu *cpu)
7959 {
7960 unsigned rn = INSTR (9, 5);
7961 unsigned rd = INSTR (4, 0);
7962
7963 NYI_assert (31, 10, 0x798F0);
7964
7965 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7966 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
7967 }
7968
7969 static void
7970 dexSimpleFPDataProc1Source (sim_cpu *cpu)
7971 {
7972 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7973 instr[30] = 0
7974 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7975 instr[28,25] = 1111
7976 instr[24] = 0
7977 instr[23,22] ==> type : 00 ==> source is single,
7978 01 ==> source is double
7979 10 ==> UNALLOC
7980 11 ==> UNALLOC or source is half
7981 instr[21] = 1
7982 instr[20,15] ==> opcode : with type 00 or 01
7983 000000 ==> FMOV, 000001 ==> FABS,
7984 000010 ==> FNEG, 000011 ==> FSQRT,
7985 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
7986 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
7987 001000 ==> FRINTN, 001001 ==> FRINTP,
7988 001010 ==> FRINTM, 001011 ==> FRINTZ,
7989 001100 ==> FRINTA, 001101 ==> UNALLOC
7990 001110 ==> FRINTX, 001111 ==> FRINTI
7991 with type 11
7992 000100 ==> FCVT (half-to-single)
7993 000101 ==> FCVT (half-to-double)
7994 instr[14,10] = 10000. */
7995
7996 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7997 uint32_t type = INSTR (23, 22);
7998 uint32_t opcode = INSTR (20, 15);
7999
8000 if (M_S != 0)
8001 HALT_UNALLOC;
8002
8003 if (type == 3)
8004 {
8005 if (opcode == 4)
8006 do_FCVT_half_to_single (cpu);
8007 else if (opcode == 5)
8008 do_FCVT_half_to_double (cpu);
8009 else
8010 HALT_UNALLOC;
8011 return;
8012 }
8013
8014 if (type == 2)
8015 HALT_UNALLOC;
8016
8017 switch (opcode)
8018 {
8019 case 0:
8020 if (type)
8021 ffmovd (cpu);
8022 else
8023 ffmovs (cpu);
8024 return;
8025
8026 case 1:
8027 if (type)
8028 fabcpu (cpu);
8029 else
8030 fabss (cpu);
8031 return;
8032
8033 case 2:
8034 if (type)
8035 fnegd (cpu);
8036 else
8037 fnegs (cpu);
8038 return;
8039
8040 case 3:
8041 if (type)
8042 fsqrtd (cpu);
8043 else
8044 fsqrts (cpu);
8045 return;
8046
8047 case 4:
8048 if (type)
8049 fcvtds (cpu);
8050 else
8051 HALT_UNALLOC;
8052 return;
8053
8054 case 5:
8055 if (type)
8056 HALT_UNALLOC;
8057 fcvtcpu (cpu);
8058 return;
8059
8060 case 8: /* FRINTN etc. */
8061 case 9:
8062 case 10:
8063 case 11:
8064 case 12:
8065 case 14:
8066 case 15:
8067 do_FRINT (cpu);
8068 return;
8069
8070 case 7:
8071 if (INSTR (22, 22))
8072 do_FCVT_double_to_half (cpu);
8073 else
8074 do_FCVT_single_to_half (cpu);
8075 return;
8076
8077 case 13:
8078 HALT_NYI;
8079
8080 default:
8081 HALT_UNALLOC;
8082 }
8083 }
8084
8085 /* 32 bit signed int to float. */
8086 static void
8087 scvtf32 (sim_cpu *cpu)
8088 {
8089 unsigned rn = INSTR (9, 5);
8090 unsigned sd = INSTR (4, 0);
8091
8092 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8093 aarch64_set_FP_float
8094 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8095 }
8096
8097 /* signed int to float. */
8098 static void
8099 scvtf (sim_cpu *cpu)
8100 {
8101 unsigned rn = INSTR (9, 5);
8102 unsigned sd = INSTR (4, 0);
8103
8104 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8105 aarch64_set_FP_float
8106 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8107 }
8108
8109 /* 32 bit signed int to double. */
8110 static void
8111 scvtd32 (sim_cpu *cpu)
8112 {
8113 unsigned rn = INSTR (9, 5);
8114 unsigned sd = INSTR (4, 0);
8115
8116 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8117 aarch64_set_FP_double
8118 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8119 }
8120
8121 /* signed int to double. */
8122 static void
8123 scvtd (sim_cpu *cpu)
8124 {
8125 unsigned rn = INSTR (9, 5);
8126 unsigned sd = INSTR (4, 0);
8127
8128 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8129 aarch64_set_FP_double
8130 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8131 }
8132
8133 static const float FLOAT_INT_MAX = (float) INT_MAX;
8134 static const float FLOAT_INT_MIN = (float) INT_MIN;
8135 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8136 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8137 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8138 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8139 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8140 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8141
8142 #define UINT_MIN 0
8143 #define ULONG_MIN 0
8144 static const float FLOAT_UINT_MAX = (float) UINT_MAX;
8145 static const float FLOAT_UINT_MIN = (float) UINT_MIN;
8146 static const double DOUBLE_UINT_MAX = (double) UINT_MAX;
8147 static const double DOUBLE_UINT_MIN = (double) UINT_MIN;
8148 static const float FLOAT_ULONG_MAX = (float) ULONG_MAX;
8149 static const float FLOAT_ULONG_MIN = (float) ULONG_MIN;
8150 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8151 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8152
8153 /* Check for FP exception conditions:
8154 NaN raises IO
8155 Infinity raises IO
8156 Out of Range raises IO and IX and saturates value
8157 Denormal raises ID and IX and sets to zero. */
8158 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8159 do \
8160 { \
8161 switch (fpclassify (F)) \
8162 { \
8163 case FP_INFINITE: \
8164 case FP_NAN: \
8165 aarch64_set_FPSR (cpu, IO); \
8166 if (signbit (F)) \
8167 VALUE = ITYPE##_MAX; \
8168 else \
8169 VALUE = ITYPE##_MIN; \
8170 break; \
8171 \
8172 case FP_NORMAL: \
8173 if (F >= FTYPE##_##ITYPE##_MAX) \
8174 { \
8175 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8176 VALUE = ITYPE##_MAX; \
8177 } \
8178 else if (F <= FTYPE##_##ITYPE##_MIN) \
8179 { \
8180 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8181 VALUE = ITYPE##_MIN; \
8182 } \
8183 break; \
8184 \
8185 case FP_SUBNORMAL: \
8186 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8187 VALUE = 0; \
8188 break; \
8189 \
8190 default: \
8191 case FP_ZERO: \
8192 VALUE = 0; \
8193 break; \
8194 } \
8195 } \
8196 while (0)
8197
8198 /* 32 bit convert float to signed int truncate towards zero. */
8199 static void
8200 fcvtszs32 (sim_cpu *cpu)
8201 {
8202 unsigned sn = INSTR (9, 5);
8203 unsigned rd = INSTR (4, 0);
8204 /* TODO : check that this rounds toward zero. */
8205 float f = aarch64_get_FP_float (cpu, sn);
8206 int32_t value = (int32_t) f;
8207
8208 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8209
8210 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8211 /* Avoid sign extension to 64 bit. */
8212 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8213 }
8214
8215 /* 64 bit convert float to signed int truncate towards zero. */
8216 static void
8217 fcvtszs (sim_cpu *cpu)
8218 {
8219 unsigned sn = INSTR (9, 5);
8220 unsigned rd = INSTR (4, 0);
8221 float f = aarch64_get_FP_float (cpu, sn);
8222 int64_t value = (int64_t) f;
8223
8224 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8225
8226 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8227 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8228 }
8229
8230 /* 32 bit convert double to signed int truncate towards zero. */
8231 static void
8232 fcvtszd32 (sim_cpu *cpu)
8233 {
8234 unsigned sn = INSTR (9, 5);
8235 unsigned rd = INSTR (4, 0);
8236 /* TODO : check that this rounds toward zero. */
8237 double d = aarch64_get_FP_double (cpu, sn);
8238 int32_t value = (int32_t) d;
8239
8240 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8241
8242 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8243 /* Avoid sign extension to 64 bit. */
8244 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8245 }
8246
8247 /* 64 bit convert double to signed int truncate towards zero. */
8248 static void
8249 fcvtszd (sim_cpu *cpu)
8250 {
8251 unsigned sn = INSTR (9, 5);
8252 unsigned rd = INSTR (4, 0);
8253 /* TODO : check that this rounds toward zero. */
8254 double d = aarch64_get_FP_double (cpu, sn);
8255 int64_t value;
8256
8257 value = (int64_t) d;
8258
8259 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8260
8261 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8262 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8263 }
8264
8265 static void
8266 do_fcvtzu (sim_cpu *cpu)
8267 {
8268 /* instr[31] = size: 32-bit (0), 64-bit (1)
8269 instr[30,23] = 00111100
8270 instr[22] = type: single (0)/ double (1)
8271 instr[21] = enable (0)/disable(1) precision
8272 instr[20,16] = 11001
8273 instr[15,10] = precision
8274 instr[9,5] = Rs
8275 instr[4,0] = Rd. */
8276
8277 unsigned rs = INSTR (9, 5);
8278 unsigned rd = INSTR (4, 0);
8279
8280 NYI_assert (30, 23, 0x3C);
8281 NYI_assert (20, 16, 0x19);
8282
8283 if (INSTR (21, 21) != 1)
8284 /* Convert to fixed point. */
8285 HALT_NYI;
8286
8287 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8288 if (INSTR (31, 31))
8289 {
8290 /* Convert to unsigned 64-bit integer. */
8291 if (INSTR (22, 22))
8292 {
8293 double d = aarch64_get_FP_double (cpu, rs);
8294 uint64_t value = (uint64_t) d;
8295
8296 /* Do not raise an exception if we have reached ULONG_MAX. */
8297 if (value != (1UL << 63))
8298 RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8299
8300 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8301 }
8302 else
8303 {
8304 float f = aarch64_get_FP_float (cpu, rs);
8305 uint64_t value = (uint64_t) f;
8306
8307 /* Do not raise an exception if we have reached ULONG_MAX. */
8308 if (value != (1UL << 63))
8309 RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8310
8311 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8312 }
8313 }
8314 else
8315 {
8316 uint32_t value;
8317
8318 /* Convert to unsigned 32-bit integer. */
8319 if (INSTR (22, 22))
8320 {
8321 double d = aarch64_get_FP_double (cpu, rs);
8322
8323 value = (uint32_t) d;
8324 /* Do not raise an exception if we have reached UINT_MAX. */
8325 if (value != (1UL << 31))
8326 RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8327 }
8328 else
8329 {
8330 float f = aarch64_get_FP_float (cpu, rs);
8331
8332 value = (uint32_t) f;
8333 /* Do not raise an exception if we have reached UINT_MAX. */
8334 if (value != (1UL << 31))
8335 RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8336 }
8337
8338 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8339 }
8340 }
8341
8342 static void
8343 do_UCVTF (sim_cpu *cpu)
8344 {
8345 /* instr[31] = size: 32-bit (0), 64-bit (1)
8346 instr[30,23] = 001 1110 0
8347 instr[22] = type: single (0)/ double (1)
8348 instr[21] = enable (0)/disable(1) precision
8349 instr[20,16] = 0 0011
8350 instr[15,10] = precision
8351 instr[9,5] = Rs
8352 instr[4,0] = Rd. */
8353
8354 unsigned rs = INSTR (9, 5);
8355 unsigned rd = INSTR (4, 0);
8356
8357 NYI_assert (30, 23, 0x3C);
8358 NYI_assert (20, 16, 0x03);
8359
8360 if (INSTR (21, 21) != 1)
8361 HALT_NYI;
8362
8363 /* FIXME: Add exception raising. */
8364 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8365 if (INSTR (31, 31))
8366 {
8367 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8368
8369 if (INSTR (22, 22))
8370 aarch64_set_FP_double (cpu, rd, (double) value);
8371 else
8372 aarch64_set_FP_float (cpu, rd, (float) value);
8373 }
8374 else
8375 {
8376 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8377
8378 if (INSTR (22, 22))
8379 aarch64_set_FP_double (cpu, rd, (double) value);
8380 else
8381 aarch64_set_FP_float (cpu, rd, (float) value);
8382 }
8383 }
8384
8385 static void
8386 float_vector_move (sim_cpu *cpu)
8387 {
8388 /* instr[31,17] == 100 1111 0101 0111
8389 instr[16] ==> direction 0=> to GR, 1=> from GR
8390 instr[15,10] => ???
8391 instr[9,5] ==> source
8392 instr[4,0] ==> dest. */
8393
8394 unsigned rn = INSTR (9, 5);
8395 unsigned rd = INSTR (4, 0);
8396
8397 NYI_assert (31, 17, 0x4F57);
8398
8399 if (INSTR (15, 10) != 0)
8400 HALT_UNALLOC;
8401
8402 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8403 if (INSTR (16, 16))
8404 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8405 else
8406 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8407 }
8408
8409 static void
8410 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8411 {
8412 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8413 instr[30 = 0
8414 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8415 instr[28,25] = 1111
8416 instr[24] = 0
8417 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8418 instr[21] = 1
8419 instr[20,19] = rmode
8420 instr[18,16] = opcode
8421 instr[15,10] = 10 0000 */
8422
8423 uint32_t rmode_opcode;
8424 uint32_t size_type;
8425 uint32_t type;
8426 uint32_t size;
8427 uint32_t S;
8428
8429 if (INSTR (31, 17) == 0x4F57)
8430 {
8431 float_vector_move (cpu);
8432 return;
8433 }
8434
8435 size = INSTR (31, 31);
8436 S = INSTR (29, 29);
8437 if (S != 0)
8438 HALT_UNALLOC;
8439
8440 type = INSTR (23, 22);
8441 if (type > 1)
8442 HALT_UNALLOC;
8443
8444 rmode_opcode = INSTR (20, 16);
8445 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8446
8447 switch (rmode_opcode)
8448 {
8449 case 2: /* SCVTF. */
8450 switch (size_type)
8451 {
8452 case 0: scvtf32 (cpu); return;
8453 case 1: scvtd32 (cpu); return;
8454 case 2: scvtf (cpu); return;
8455 case 3: scvtd (cpu); return;
8456 }
8457
8458 case 6: /* FMOV GR, Vec. */
8459 switch (size_type)
8460 {
8461 case 0: gfmovs (cpu); return;
8462 case 3: gfmovd (cpu); return;
8463 default: HALT_UNALLOC;
8464 }
8465
8466 case 7: /* FMOV vec, GR. */
8467 switch (size_type)
8468 {
8469 case 0: fgmovs (cpu); return;
8470 case 3: fgmovd (cpu); return;
8471 default: HALT_UNALLOC;
8472 }
8473
8474 case 24: /* FCVTZS. */
8475 switch (size_type)
8476 {
8477 case 0: fcvtszs32 (cpu); return;
8478 case 1: fcvtszd32 (cpu); return;
8479 case 2: fcvtszs (cpu); return;
8480 case 3: fcvtszd (cpu); return;
8481 }
8482
8483 case 25: do_fcvtzu (cpu); return;
8484 case 3: do_UCVTF (cpu); return;
8485
8486 case 0: /* FCVTNS. */
8487 case 1: /* FCVTNU. */
8488 case 4: /* FCVTAS. */
8489 case 5: /* FCVTAU. */
8490 case 8: /* FCVPTS. */
8491 case 9: /* FCVTPU. */
8492 case 16: /* FCVTMS. */
8493 case 17: /* FCVTMU. */
8494 default:
8495 HALT_NYI;
8496 }
8497 }
8498
8499 static void
8500 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8501 {
8502 uint32_t flags;
8503
8504 /* FIXME: Add exception raising. */
8505 if (isnan (fvalue1) || isnan (fvalue2))
8506 flags = C|V;
8507 else if (isinf (fvalue1) && isinf (fvalue2))
8508 {
8509 /* Subtracting two infinities may give a NaN. We only need to compare
8510 the signs, which we can get from isinf. */
8511 int result = isinf (fvalue1) - isinf (fvalue2);
8512
8513 if (result == 0)
8514 flags = Z|C;
8515 else if (result < 0)
8516 flags = N;
8517 else /* (result > 0). */
8518 flags = C;
8519 }
8520 else
8521 {
8522 float result = fvalue1 - fvalue2;
8523
8524 if (result == 0.0)
8525 flags = Z|C;
8526 else if (result < 0)
8527 flags = N;
8528 else /* (result > 0). */
8529 flags = C;
8530 }
8531
8532 aarch64_set_CPSR (cpu, flags);
8533 }
8534
8535 static void
8536 fcmps (sim_cpu *cpu)
8537 {
8538 unsigned sm = INSTR (20, 16);
8539 unsigned sn = INSTR ( 9, 5);
8540
8541 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8542 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8543
8544 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8545 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8546 }
8547
8548 /* Float compare to zero -- Invalid Operation exception
8549 only on signaling NaNs. */
8550 static void
8551 fcmpzs (sim_cpu *cpu)
8552 {
8553 unsigned sn = INSTR ( 9, 5);
8554 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8555
8556 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8557 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8558 }
8559
8560 /* Float compare -- Invalid Operation exception on all NaNs. */
8561 static void
8562 fcmpes (sim_cpu *cpu)
8563 {
8564 unsigned sm = INSTR (20, 16);
8565 unsigned sn = INSTR ( 9, 5);
8566
8567 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8568 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8569
8570 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8571 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8572 }
8573
8574 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8575 static void
8576 fcmpzes (sim_cpu *cpu)
8577 {
8578 unsigned sn = INSTR ( 9, 5);
8579 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8580
8581 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8582 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8583 }
8584
8585 static void
8586 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8587 {
8588 uint32_t flags;
8589
8590 /* FIXME: Add exception raising. */
8591 if (isnan (dval1) || isnan (dval2))
8592 flags = C|V;
8593 else if (isinf (dval1) && isinf (dval2))
8594 {
8595 /* Subtracting two infinities may give a NaN. We only need to compare
8596 the signs, which we can get from isinf. */
8597 int result = isinf (dval1) - isinf (dval2);
8598
8599 if (result == 0)
8600 flags = Z|C;
8601 else if (result < 0)
8602 flags = N;
8603 else /* (result > 0). */
8604 flags = C;
8605 }
8606 else
8607 {
8608 double result = dval1 - dval2;
8609
8610 if (result == 0.0)
8611 flags = Z|C;
8612 else if (result < 0)
8613 flags = N;
8614 else /* (result > 0). */
8615 flags = C;
8616 }
8617
8618 aarch64_set_CPSR (cpu, flags);
8619 }
8620
8621 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8622 static void
8623 fcmpd (sim_cpu *cpu)
8624 {
8625 unsigned sm = INSTR (20, 16);
8626 unsigned sn = INSTR ( 9, 5);
8627
8628 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8629 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8630
8631 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8632 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8633 }
8634
8635 /* Double compare to zero -- Invalid Operation exception
8636 only on signaling NaNs. */
8637 static void
8638 fcmpzd (sim_cpu *cpu)
8639 {
8640 unsigned sn = INSTR ( 9, 5);
8641 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8642
8643 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8644 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8645 }
8646
8647 /* Double compare -- Invalid Operation exception on all NaNs. */
8648 static void
8649 fcmped (sim_cpu *cpu)
8650 {
8651 unsigned sm = INSTR (20, 16);
8652 unsigned sn = INSTR ( 9, 5);
8653
8654 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8655 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8656
8657 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8658 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8659 }
8660
8661 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8662 static void
8663 fcmpzed (sim_cpu *cpu)
8664 {
8665 unsigned sn = INSTR ( 9, 5);
8666 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8667
8668 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8669 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8670 }
8671
8672 static void
8673 dexSimpleFPCompare (sim_cpu *cpu)
8674 {
8675 /* assert instr[28,25] == 1111
8676 instr[30:24:21:13,10] = 0011000
8677 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8678 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8679 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8680 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8681 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8682 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8683 ow ==> UNALLOC */
8684 uint32_t dispatch;
8685 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8686 uint32_t type = INSTR (23, 22);
8687 uint32_t op = INSTR (15, 14);
8688 uint32_t op2_2_0 = INSTR (2, 0);
8689
8690 if (op2_2_0 != 0)
8691 HALT_UNALLOC;
8692
8693 if (M_S != 0)
8694 HALT_UNALLOC;
8695
8696 if (type > 1)
8697 HALT_UNALLOC;
8698
8699 if (op != 0)
8700 HALT_UNALLOC;
8701
8702 /* dispatch on type and top 2 bits of opcode. */
8703 dispatch = (type << 2) | INSTR (4, 3);
8704
8705 switch (dispatch)
8706 {
8707 case 0: fcmps (cpu); return;
8708 case 1: fcmpzs (cpu); return;
8709 case 2: fcmpes (cpu); return;
8710 case 3: fcmpzes (cpu); return;
8711 case 4: fcmpd (cpu); return;
8712 case 5: fcmpzd (cpu); return;
8713 case 6: fcmped (cpu); return;
8714 case 7: fcmpzed (cpu); return;
8715 }
8716 }
8717
8718 static void
8719 do_scalar_FADDP (sim_cpu *cpu)
8720 {
8721 /* instr [31,23] = 0111 1110 0
8722 instr [22] = single(0)/double(1)
8723 instr [21,10] = 11 0000 1101 10
8724 instr [9,5] = Fn
8725 instr [4,0] = Fd. */
8726
8727 unsigned Fn = INSTR (9, 5);
8728 unsigned Fd = INSTR (4, 0);
8729
8730 NYI_assert (31, 23, 0x0FC);
8731 NYI_assert (21, 10, 0xC36);
8732
8733 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8734 if (INSTR (22, 22))
8735 {
8736 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8737 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8738
8739 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8740 }
8741 else
8742 {
8743 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8744 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8745
8746 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8747 }
8748 }
8749
8750 /* Floating point absolute difference. */
8751
8752 static void
8753 do_scalar_FABD (sim_cpu *cpu)
8754 {
8755 /* instr [31,23] = 0111 1110 1
8756 instr [22] = float(0)/double(1)
8757 instr [21] = 1
8758 instr [20,16] = Rm
8759 instr [15,10] = 1101 01
8760 instr [9, 5] = Rn
8761 instr [4, 0] = Rd. */
8762
8763 unsigned rm = INSTR (20, 16);
8764 unsigned rn = INSTR (9, 5);
8765 unsigned rd = INSTR (4, 0);
8766
8767 NYI_assert (31, 23, 0x0FD);
8768 NYI_assert (21, 21, 1);
8769 NYI_assert (15, 10, 0x35);
8770
8771 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8772 if (INSTR (22, 22))
8773 aarch64_set_FP_double (cpu, rd,
8774 fabs (aarch64_get_FP_double (cpu, rn)
8775 - aarch64_get_FP_double (cpu, rm)));
8776 else
8777 aarch64_set_FP_float (cpu, rd,
8778 fabsf (aarch64_get_FP_float (cpu, rn)
8779 - aarch64_get_FP_float (cpu, rm)));
8780 }
8781
8782 static void
8783 do_scalar_CMGT (sim_cpu *cpu)
8784 {
8785 /* instr [31,21] = 0101 1110 111
8786 instr [20,16] = Rm
8787 instr [15,10] = 00 1101
8788 instr [9, 5] = Rn
8789 instr [4, 0] = Rd. */
8790
8791 unsigned rm = INSTR (20, 16);
8792 unsigned rn = INSTR (9, 5);
8793 unsigned rd = INSTR (4, 0);
8794
8795 NYI_assert (31, 21, 0x2F7);
8796 NYI_assert (15, 10, 0x0D);
8797
8798 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8799 aarch64_set_vec_u64 (cpu, rd, 0,
8800 aarch64_get_vec_u64 (cpu, rn, 0) >
8801 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8802 }
8803
8804 static void
8805 do_scalar_USHR (sim_cpu *cpu)
8806 {
8807 /* instr [31,23] = 0111 1111 0
8808 instr [22,16] = shift amount
8809 instr [15,10] = 0000 01
8810 instr [9, 5] = Rn
8811 instr [4, 0] = Rd. */
8812
8813 unsigned amount = 128 - INSTR (22, 16);
8814 unsigned rn = INSTR (9, 5);
8815 unsigned rd = INSTR (4, 0);
8816
8817 NYI_assert (31, 23, 0x0FE);
8818 NYI_assert (15, 10, 0x01);
8819
8820 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8821 aarch64_set_vec_u64 (cpu, rd, 0,
8822 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8823 }
8824
8825 static void
8826 do_scalar_SSHL (sim_cpu *cpu)
8827 {
8828 /* instr [31,21] = 0101 1110 111
8829 instr [20,16] = Rm
8830 instr [15,10] = 0100 01
8831 instr [9, 5] = Rn
8832 instr [4, 0] = Rd. */
8833
8834 unsigned rm = INSTR (20, 16);
8835 unsigned rn = INSTR (9, 5);
8836 unsigned rd = INSTR (4, 0);
8837 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8838
8839 NYI_assert (31, 21, 0x2F7);
8840 NYI_assert (15, 10, 0x11);
8841
8842 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8843 if (shift >= 0)
8844 aarch64_set_vec_s64 (cpu, rd, 0,
8845 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8846 else
8847 aarch64_set_vec_s64 (cpu, rd, 0,
8848 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8849 }
8850
8851 static void
8852 do_scalar_shift (sim_cpu *cpu)
8853 {
8854 /* instr [31,23] = 0101 1111 0
8855 instr [22,16] = shift amount
8856 instr [15,10] = 0101 01 [SHL]
8857 instr [15,10] = 0000 01 [SSHR]
8858 instr [9, 5] = Rn
8859 instr [4, 0] = Rd. */
8860
8861 unsigned rn = INSTR (9, 5);
8862 unsigned rd = INSTR (4, 0);
8863 unsigned amount;
8864
8865 NYI_assert (31, 23, 0x0BE);
8866
8867 if (INSTR (22, 22) == 0)
8868 HALT_UNALLOC;
8869
8870 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8871 switch (INSTR (15, 10))
8872 {
8873 case 0x01: /* SSHR */
8874 amount = 128 - INSTR (22, 16);
8875 aarch64_set_vec_s64 (cpu, rd, 0,
8876 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8877 return;
8878 case 0x15: /* SHL */
8879 amount = INSTR (22, 16) - 64;
8880 aarch64_set_vec_u64 (cpu, rd, 0,
8881 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8882 return;
8883 default:
8884 HALT_NYI;
8885 }
8886 }
8887
8888 /* FCMEQ FCMGT FCMGE. */
8889 static void
8890 do_scalar_FCM (sim_cpu *cpu)
8891 {
8892 /* instr [31,30] = 01
8893 instr [29] = U
8894 instr [28,24] = 1 1110
8895 instr [23] = E
8896 instr [22] = size
8897 instr [21] = 1
8898 instr [20,16] = Rm
8899 instr [15,12] = 1110
8900 instr [11] = AC
8901 instr [10] = 1
8902 instr [9, 5] = Rn
8903 instr [4, 0] = Rd. */
8904
8905 unsigned rm = INSTR (20, 16);
8906 unsigned rn = INSTR (9, 5);
8907 unsigned rd = INSTR (4, 0);
8908 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
8909 unsigned result;
8910 float val1;
8911 float val2;
8912
8913 NYI_assert (31, 30, 1);
8914 NYI_assert (28, 24, 0x1E);
8915 NYI_assert (21, 21, 1);
8916 NYI_assert (15, 12, 0xE);
8917 NYI_assert (10, 10, 1);
8918
8919 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8920 if (INSTR (22, 22))
8921 {
8922 double val1 = aarch64_get_FP_double (cpu, rn);
8923 double val2 = aarch64_get_FP_double (cpu, rm);
8924
8925 switch (EUac)
8926 {
8927 case 0: /* 000 */
8928 result = val1 == val2;
8929 break;
8930
8931 case 3: /* 011 */
8932 val1 = fabs (val1);
8933 val2 = fabs (val2);
8934 /* Fall through. */
8935 case 2: /* 010 */
8936 result = val1 >= val2;
8937 break;
8938
8939 case 7: /* 111 */
8940 val1 = fabs (val1);
8941 val2 = fabs (val2);
8942 /* Fall through. */
8943 case 6: /* 110 */
8944 result = val1 > val2;
8945 break;
8946
8947 default:
8948 HALT_UNALLOC;
8949 }
8950
8951 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8952 return;
8953 }
8954
8955 val1 = aarch64_get_FP_float (cpu, rn);
8956 val2 = aarch64_get_FP_float (cpu, rm);
8957
8958 switch (EUac)
8959 {
8960 case 0: /* 000 */
8961 result = val1 == val2;
8962 break;
8963
8964 case 3: /* 011 */
8965 val1 = fabsf (val1);
8966 val2 = fabsf (val2);
8967 /* Fall through. */
8968 case 2: /* 010 */
8969 result = val1 >= val2;
8970 break;
8971
8972 case 7: /* 111 */
8973 val1 = fabsf (val1);
8974 val2 = fabsf (val2);
8975 /* Fall through. */
8976 case 6: /* 110 */
8977 result = val1 > val2;
8978 break;
8979
8980 default:
8981 HALT_UNALLOC;
8982 }
8983
8984 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8985 }
8986
8987 /* An alias of DUP. */
8988 static void
8989 do_scalar_MOV (sim_cpu *cpu)
8990 {
8991 /* instr [31,21] = 0101 1110 000
8992 instr [20,16] = imm5
8993 instr [15,10] = 0000 01
8994 instr [9, 5] = Rn
8995 instr [4, 0] = Rd. */
8996
8997 unsigned rn = INSTR (9, 5);
8998 unsigned rd = INSTR (4, 0);
8999 unsigned index;
9000
9001 NYI_assert (31, 21, 0x2F0);
9002 NYI_assert (15, 10, 0x01);
9003
9004 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9005 if (INSTR (16, 16))
9006 {
9007 /* 8-bit. */
9008 index = INSTR (20, 17);
9009 aarch64_set_vec_u8
9010 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
9011 }
9012 else if (INSTR (17, 17))
9013 {
9014 /* 16-bit. */
9015 index = INSTR (20, 18);
9016 aarch64_set_vec_u16
9017 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9018 }
9019 else if (INSTR (18, 18))
9020 {
9021 /* 32-bit. */
9022 index = INSTR (20, 19);
9023 aarch64_set_vec_u32
9024 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9025 }
9026 else if (INSTR (19, 19))
9027 {
9028 /* 64-bit. */
9029 index = INSTR (20, 20);
9030 aarch64_set_vec_u64
9031 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9032 }
9033 else
9034 HALT_UNALLOC;
9035 }
9036
9037 static void
9038 do_scalar_NEG (sim_cpu *cpu)
9039 {
9040 /* instr [31,10] = 0111 1110 1110 0000 1011 10
9041 instr [9, 5] = Rn
9042 instr [4, 0] = Rd. */
9043
9044 unsigned rn = INSTR (9, 5);
9045 unsigned rd = INSTR (4, 0);
9046
9047 NYI_assert (31, 10, 0x1FB82E);
9048
9049 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9050 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9051 }
9052
9053 static void
9054 do_scalar_USHL (sim_cpu *cpu)
9055 {
9056 /* instr [31,21] = 0111 1110 111
9057 instr [20,16] = Rm
9058 instr [15,10] = 0100 01
9059 instr [9, 5] = Rn
9060 instr [4, 0] = Rd. */
9061
9062 unsigned rm = INSTR (20, 16);
9063 unsigned rn = INSTR (9, 5);
9064 unsigned rd = INSTR (4, 0);
9065 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9066
9067 NYI_assert (31, 21, 0x3F7);
9068 NYI_assert (15, 10, 0x11);
9069
9070 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9071 if (shift >= 0)
9072 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9073 else
9074 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9075 }
9076
9077 static void
9078 do_double_add (sim_cpu *cpu)
9079 {
9080 /* instr [31,21] = 0101 1110 111
9081 instr [20,16] = Fn
9082 instr [15,10] = 1000 01
9083 instr [9,5] = Fm
9084 instr [4,0] = Fd. */
9085 unsigned Fd;
9086 unsigned Fm;
9087 unsigned Fn;
9088 double val1;
9089 double val2;
9090
9091 NYI_assert (31, 21, 0x2F7);
9092 NYI_assert (15, 10, 0x21);
9093
9094 Fd = INSTR (4, 0);
9095 Fm = INSTR (9, 5);
9096 Fn = INSTR (20, 16);
9097
9098 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9099 val1 = aarch64_get_FP_double (cpu, Fm);
9100 val2 = aarch64_get_FP_double (cpu, Fn);
9101
9102 aarch64_set_FP_double (cpu, Fd, val1 + val2);
9103 }
9104
9105 static void
9106 do_scalar_UCVTF (sim_cpu *cpu)
9107 {
9108 /* instr [31,23] = 0111 1110 0
9109 instr [22] = single(0)/double(1)
9110 instr [21,10] = 10 0001 1101 10
9111 instr [9,5] = rn
9112 instr [4,0] = rd. */
9113
9114 unsigned rn = INSTR (9, 5);
9115 unsigned rd = INSTR (4, 0);
9116
9117 NYI_assert (31, 23, 0x0FC);
9118 NYI_assert (21, 10, 0x876);
9119
9120 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9121 if (INSTR (22, 22))
9122 {
9123 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9124
9125 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9126 }
9127 else
9128 {
9129 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9130
9131 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9132 }
9133 }
9134
9135 static void
9136 do_scalar_vec (sim_cpu *cpu)
9137 {
9138 /* instr [30] = 1. */
9139 /* instr [28,25] = 1111. */
9140 switch (INSTR (31, 23))
9141 {
9142 case 0xBC:
9143 switch (INSTR (15, 10))
9144 {
9145 case 0x01: do_scalar_MOV (cpu); return;
9146 case 0x39: do_scalar_FCM (cpu); return;
9147 case 0x3B: do_scalar_FCM (cpu); return;
9148 }
9149 break;
9150
9151 case 0xBE: do_scalar_shift (cpu); return;
9152
9153 case 0xFC:
9154 switch (INSTR (15, 10))
9155 {
9156 case 0x36:
9157 switch (INSTR (21, 16))
9158 {
9159 case 0x30: do_scalar_FADDP (cpu); return;
9160 case 0x21: do_scalar_UCVTF (cpu); return;
9161 }
9162 HALT_NYI;
9163 case 0x39: do_scalar_FCM (cpu); return;
9164 case 0x3B: do_scalar_FCM (cpu); return;
9165 }
9166 break;
9167
9168 case 0xFD:
9169 switch (INSTR (15, 10))
9170 {
9171 case 0x0D: do_scalar_CMGT (cpu); return;
9172 case 0x11: do_scalar_USHL (cpu); return;
9173 case 0x2E: do_scalar_NEG (cpu); return;
9174 case 0x35: do_scalar_FABD (cpu); return;
9175 case 0x39: do_scalar_FCM (cpu); return;
9176 case 0x3B: do_scalar_FCM (cpu); return;
9177 default:
9178 HALT_NYI;
9179 }
9180
9181 case 0xFE: do_scalar_USHR (cpu); return;
9182
9183 case 0xBD:
9184 switch (INSTR (15, 10))
9185 {
9186 case 0x21: do_double_add (cpu); return;
9187 case 0x11: do_scalar_SSHL (cpu); return;
9188 default:
9189 HALT_NYI;
9190 }
9191
9192 default:
9193 HALT_NYI;
9194 }
9195 }
9196
9197 static void
9198 dexAdvSIMD1 (sim_cpu *cpu)
9199 {
9200 /* instr [28,25] = 1 111. */
9201
9202 /* We are currently only interested in the basic
9203 scalar fp routines which all have bit 30 = 0. */
9204 if (INSTR (30, 30))
9205 do_scalar_vec (cpu);
9206
9207 /* instr[24] is set for FP data processing 3-source and clear for
9208 all other basic scalar fp instruction groups. */
9209 else if (INSTR (24, 24))
9210 dexSimpleFPDataProc3Source (cpu);
9211
9212 /* instr[21] is clear for floating <-> fixed conversions and set for
9213 all other basic scalar fp instruction groups. */
9214 else if (!INSTR (21, 21))
9215 dexSimpleFPFixedConvert (cpu);
9216
9217 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9218 11 ==> cond select, 00 ==> other. */
9219 else
9220 switch (INSTR (11, 10))
9221 {
9222 case 1: dexSimpleFPCondCompare (cpu); return;
9223 case 2: dexSimpleFPDataProc2Source (cpu); return;
9224 case 3: dexSimpleFPCondSelect (cpu); return;
9225
9226 default:
9227 /* Now an ordered cascade of tests.
9228 FP immediate has instr [12] == 1.
9229 FP compare has instr [13] == 1.
9230 FP Data Proc 1 Source has instr [14] == 1.
9231 FP floating <--> integer conversions has instr [15] == 0. */
9232 if (INSTR (12, 12))
9233 dexSimpleFPImmediate (cpu);
9234
9235 else if (INSTR (13, 13))
9236 dexSimpleFPCompare (cpu);
9237
9238 else if (INSTR (14, 14))
9239 dexSimpleFPDataProc1Source (cpu);
9240
9241 else if (!INSTR (15, 15))
9242 dexSimpleFPIntegerConvert (cpu);
9243
9244 else
9245 /* If we get here then instr[15] == 1 which means UNALLOC. */
9246 HALT_UNALLOC;
9247 }
9248 }
9249
9250 /* PC relative addressing. */
9251
9252 static void
9253 pcadr (sim_cpu *cpu)
9254 {
9255 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9256 instr[30,29] = immlo
9257 instr[23,5] = immhi. */
9258 uint64_t address;
9259 unsigned rd = INSTR (4, 0);
9260 uint32_t isPage = INSTR (31, 31);
9261 union { int64_t u64; uint64_t s64; } imm;
9262 uint64_t offset;
9263
9264 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9265 offset = imm.u64;
9266 offset = (offset << 2) | INSTR (30, 29);
9267
9268 address = aarch64_get_PC (cpu);
9269
9270 if (isPage)
9271 {
9272 offset <<= 12;
9273 address &= ~0xfff;
9274 }
9275
9276 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9277 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9278 }
9279
9280 /* Specific decode and execute for group Data Processing Immediate. */
9281
9282 static void
9283 dexPCRelAddressing (sim_cpu *cpu)
9284 {
9285 /* assert instr[28,24] = 10000. */
9286 pcadr (cpu);
9287 }
9288
9289 /* Immediate logical.
9290 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9291 16, 32 or 64 bit sequence pulled out at decode and possibly
9292 inverting it..
9293
9294 N.B. the output register (dest) can normally be Xn or SP
9295 the exception occurs for flag setting instructions which may
9296 only use Xn for the output (dest). The input register can
9297 never be SP. */
9298
9299 /* 32 bit and immediate. */
9300 static void
9301 and32 (sim_cpu *cpu, uint32_t bimm)
9302 {
9303 unsigned rn = INSTR (9, 5);
9304 unsigned rd = INSTR (4, 0);
9305
9306 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9307 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9308 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9309 }
9310
9311 /* 64 bit and immediate. */
9312 static void
9313 and64 (sim_cpu *cpu, uint64_t bimm)
9314 {
9315 unsigned rn = INSTR (9, 5);
9316 unsigned rd = INSTR (4, 0);
9317
9318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9319 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9320 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9321 }
9322
9323 /* 32 bit and immediate set flags. */
9324 static void
9325 ands32 (sim_cpu *cpu, uint32_t bimm)
9326 {
9327 unsigned rn = INSTR (9, 5);
9328 unsigned rd = INSTR (4, 0);
9329
9330 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9331 uint32_t value2 = bimm;
9332
9333 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9334 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9335 set_flags_for_binop32 (cpu, value1 & value2);
9336 }
9337
9338 /* 64 bit and immediate set flags. */
9339 static void
9340 ands64 (sim_cpu *cpu, uint64_t bimm)
9341 {
9342 unsigned rn = INSTR (9, 5);
9343 unsigned rd = INSTR (4, 0);
9344
9345 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9346 uint64_t value2 = bimm;
9347
9348 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9349 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9350 set_flags_for_binop64 (cpu, value1 & value2);
9351 }
9352
9353 /* 32 bit exclusive or immediate. */
9354 static void
9355 eor32 (sim_cpu *cpu, uint32_t bimm)
9356 {
9357 unsigned rn = INSTR (9, 5);
9358 unsigned rd = INSTR (4, 0);
9359
9360 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9361 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9362 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9363 }
9364
9365 /* 64 bit exclusive or immediate. */
9366 static void
9367 eor64 (sim_cpu *cpu, uint64_t bimm)
9368 {
9369 unsigned rn = INSTR (9, 5);
9370 unsigned rd = INSTR (4, 0);
9371
9372 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9373 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9374 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9375 }
9376
9377 /* 32 bit or immediate. */
9378 static void
9379 orr32 (sim_cpu *cpu, uint32_t bimm)
9380 {
9381 unsigned rn = INSTR (9, 5);
9382 unsigned rd = INSTR (4, 0);
9383
9384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9385 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9386 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9387 }
9388
9389 /* 64 bit or immediate. */
9390 static void
9391 orr64 (sim_cpu *cpu, uint64_t bimm)
9392 {
9393 unsigned rn = INSTR (9, 5);
9394 unsigned rd = INSTR (4, 0);
9395
9396 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9397 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9398 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9399 }
9400
9401 /* Logical shifted register.
9402 These allow an optional LSL, ASR, LSR or ROR to the second source
9403 register with a count up to the register bit count.
9404 N.B register args may not be SP. */
9405
9406 /* 32 bit AND shifted register. */
9407 static void
9408 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9409 {
9410 unsigned rm = INSTR (20, 16);
9411 unsigned rn = INSTR (9, 5);
9412 unsigned rd = INSTR (4, 0);
9413
9414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9415 aarch64_set_reg_u64
9416 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9417 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9418 }
9419
9420 /* 64 bit AND shifted register. */
9421 static void
9422 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9423 {
9424 unsigned rm = INSTR (20, 16);
9425 unsigned rn = INSTR (9, 5);
9426 unsigned rd = INSTR (4, 0);
9427
9428 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9429 aarch64_set_reg_u64
9430 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9431 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9432 }
9433
9434 /* 32 bit AND shifted register setting flags. */
9435 static void
9436 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9437 {
9438 unsigned rm = INSTR (20, 16);
9439 unsigned rn = INSTR (9, 5);
9440 unsigned rd = INSTR (4, 0);
9441
9442 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9443 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9444 shift, count);
9445
9446 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9447 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9448 set_flags_for_binop32 (cpu, value1 & value2);
9449 }
9450
9451 /* 64 bit AND shifted register setting flags. */
9452 static void
9453 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9454 {
9455 unsigned rm = INSTR (20, 16);
9456 unsigned rn = INSTR (9, 5);
9457 unsigned rd = INSTR (4, 0);
9458
9459 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9460 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9461 shift, count);
9462
9463 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9464 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9465 set_flags_for_binop64 (cpu, value1 & value2);
9466 }
9467
9468 /* 32 bit BIC shifted register. */
9469 static void
9470 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9471 {
9472 unsigned rm = INSTR (20, 16);
9473 unsigned rn = INSTR (9, 5);
9474 unsigned rd = INSTR (4, 0);
9475
9476 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9477 aarch64_set_reg_u64
9478 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9479 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9480 }
9481
9482 /* 64 bit BIC shifted register. */
9483 static void
9484 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9485 {
9486 unsigned rm = INSTR (20, 16);
9487 unsigned rn = INSTR (9, 5);
9488 unsigned rd = INSTR (4, 0);
9489
9490 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9491 aarch64_set_reg_u64
9492 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9493 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9494 }
9495
9496 /* 32 bit BIC shifted register setting flags. */
9497 static void
9498 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9499 {
9500 unsigned rm = INSTR (20, 16);
9501 unsigned rn = INSTR (9, 5);
9502 unsigned rd = INSTR (4, 0);
9503
9504 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9505 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9506 shift, count);
9507
9508 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9509 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9510 set_flags_for_binop32 (cpu, value1 & value2);
9511 }
9512
9513 /* 64 bit BIC shifted register setting flags. */
9514 static void
9515 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9516 {
9517 unsigned rm = INSTR (20, 16);
9518 unsigned rn = INSTR (9, 5);
9519 unsigned rd = INSTR (4, 0);
9520
9521 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9522 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9523 shift, count);
9524
9525 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9526 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9527 set_flags_for_binop64 (cpu, value1 & value2);
9528 }
9529
9530 /* 32 bit EON shifted register. */
9531 static void
9532 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9533 {
9534 unsigned rm = INSTR (20, 16);
9535 unsigned rn = INSTR (9, 5);
9536 unsigned rd = INSTR (4, 0);
9537
9538 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9539 aarch64_set_reg_u64
9540 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9541 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9542 }
9543
9544 /* 64 bit EON shifted register. */
9545 static void
9546 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9547 {
9548 unsigned rm = INSTR (20, 16);
9549 unsigned rn = INSTR (9, 5);
9550 unsigned rd = INSTR (4, 0);
9551
9552 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9553 aarch64_set_reg_u64
9554 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9555 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9556 }
9557
9558 /* 32 bit EOR shifted register. */
9559 static void
9560 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9561 {
9562 unsigned rm = INSTR (20, 16);
9563 unsigned rn = INSTR (9, 5);
9564 unsigned rd = INSTR (4, 0);
9565
9566 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9567 aarch64_set_reg_u64
9568 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9569 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9570 }
9571
9572 /* 64 bit EOR shifted register. */
9573 static void
9574 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9575 {
9576 unsigned rm = INSTR (20, 16);
9577 unsigned rn = INSTR (9, 5);
9578 unsigned rd = INSTR (4, 0);
9579
9580 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9581 aarch64_set_reg_u64
9582 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9583 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9584 }
9585
9586 /* 32 bit ORR shifted register. */
9587 static void
9588 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9589 {
9590 unsigned rm = INSTR (20, 16);
9591 unsigned rn = INSTR (9, 5);
9592 unsigned rd = INSTR (4, 0);
9593
9594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9595 aarch64_set_reg_u64
9596 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9597 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9598 }
9599
9600 /* 64 bit ORR shifted register. */
9601 static void
9602 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9603 {
9604 unsigned rm = INSTR (20, 16);
9605 unsigned rn = INSTR (9, 5);
9606 unsigned rd = INSTR (4, 0);
9607
9608 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9609 aarch64_set_reg_u64
9610 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9611 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9612 }
9613
9614 /* 32 bit ORN shifted register. */
9615 static void
9616 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9617 {
9618 unsigned rm = INSTR (20, 16);
9619 unsigned rn = INSTR (9, 5);
9620 unsigned rd = INSTR (4, 0);
9621
9622 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9623 aarch64_set_reg_u64
9624 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9625 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9626 }
9627
9628 /* 64 bit ORN shifted register. */
9629 static void
9630 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9631 {
9632 unsigned rm = INSTR (20, 16);
9633 unsigned rn = INSTR (9, 5);
9634 unsigned rd = INSTR (4, 0);
9635
9636 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9637 aarch64_set_reg_u64
9638 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9639 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9640 }
9641
9642 static void
9643 dexLogicalImmediate (sim_cpu *cpu)
9644 {
9645 /* assert instr[28,23] = 1001000
9646 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9647 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9648 instr[22] = N : used to construct immediate mask
9649 instr[21,16] = immr
9650 instr[15,10] = imms
9651 instr[9,5] = Rn
9652 instr[4,0] = Rd */
9653
9654 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9655 uint32_t size = INSTR (31, 31);
9656 uint32_t N = INSTR (22, 22);
9657 /* uint32_t immr = INSTR (21, 16);. */
9658 /* uint32_t imms = INSTR (15, 10);. */
9659 uint32_t index = INSTR (22, 10);
9660 uint64_t bimm64 = LITable [index];
9661 uint32_t dispatch = INSTR (30, 29);
9662
9663 if (~size & N)
9664 HALT_UNALLOC;
9665
9666 if (!bimm64)
9667 HALT_UNALLOC;
9668
9669 if (size == 0)
9670 {
9671 uint32_t bimm = (uint32_t) bimm64;
9672
9673 switch (dispatch)
9674 {
9675 case 0: and32 (cpu, bimm); return;
9676 case 1: orr32 (cpu, bimm); return;
9677 case 2: eor32 (cpu, bimm); return;
9678 case 3: ands32 (cpu, bimm); return;
9679 }
9680 }
9681 else
9682 {
9683 switch (dispatch)
9684 {
9685 case 0: and64 (cpu, bimm64); return;
9686 case 1: orr64 (cpu, bimm64); return;
9687 case 2: eor64 (cpu, bimm64); return;
9688 case 3: ands64 (cpu, bimm64); return;
9689 }
9690 }
9691 HALT_UNALLOC;
9692 }
9693
9694 /* Immediate move.
9695 The uimm argument is a 16 bit value to be inserted into the
9696 target register the pos argument locates the 16 bit word in the
9697 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9698 3} for 64 bit.
9699 N.B register arg may not be SP so it should be.
9700 accessed using the setGZRegisterXXX accessors. */
9701
9702 /* 32 bit move 16 bit immediate zero remaining shorts. */
9703 static void
9704 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9705 {
9706 unsigned rd = INSTR (4, 0);
9707
9708 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9709 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9710 }
9711
9712 /* 64 bit move 16 bit immediate zero remaining shorts. */
9713 static void
9714 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9715 {
9716 unsigned rd = INSTR (4, 0);
9717
9718 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9719 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9720 }
9721
9722 /* 32 bit move 16 bit immediate negated. */
9723 static void
9724 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9725 {
9726 unsigned rd = INSTR (4, 0);
9727
9728 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9729 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9730 }
9731
9732 /* 64 bit move 16 bit immediate negated. */
9733 static void
9734 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9735 {
9736 unsigned rd = INSTR (4, 0);
9737
9738 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9739 aarch64_set_reg_u64
9740 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9741 ^ 0xffffffffffffffffULL));
9742 }
9743
9744 /* 32 bit move 16 bit immediate keep remaining shorts. */
9745 static void
9746 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9747 {
9748 unsigned rd = INSTR (4, 0);
9749 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9750 uint32_t value = val << (pos * 16);
9751 uint32_t mask = ~(0xffffU << (pos * 16));
9752
9753 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9754 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9755 }
9756
9757 /* 64 bit move 16 it immediate keep remaining shorts. */
9758 static void
9759 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9760 {
9761 unsigned rd = INSTR (4, 0);
9762 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9763 uint64_t value = (uint64_t) val << (pos * 16);
9764 uint64_t mask = ~(0xffffULL << (pos * 16));
9765
9766 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9767 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9768 }
9769
9770 static void
9771 dexMoveWideImmediate (sim_cpu *cpu)
9772 {
9773 /* assert instr[28:23] = 100101
9774 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9775 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9776 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9777 instr[20,5] = uimm16
9778 instr[4,0] = Rd */
9779
9780 /* N.B. the (multiple of 16) shift is applied by the called routine,
9781 we just pass the multiplier. */
9782
9783 uint32_t imm;
9784 uint32_t size = INSTR (31, 31);
9785 uint32_t op = INSTR (30, 29);
9786 uint32_t shift = INSTR (22, 21);
9787
9788 /* 32 bit can only shift 0 or 1 lot of 16.
9789 anything else is an unallocated instruction. */
9790 if (size == 0 && (shift > 1))
9791 HALT_UNALLOC;
9792
9793 if (op == 1)
9794 HALT_UNALLOC;
9795
9796 imm = INSTR (20, 5);
9797
9798 if (size == 0)
9799 {
9800 if (op == 0)
9801 movn32 (cpu, imm, shift);
9802 else if (op == 2)
9803 movz32 (cpu, imm, shift);
9804 else
9805 movk32 (cpu, imm, shift);
9806 }
9807 else
9808 {
9809 if (op == 0)
9810 movn64 (cpu, imm, shift);
9811 else if (op == 2)
9812 movz64 (cpu, imm, shift);
9813 else
9814 movk64 (cpu, imm, shift);
9815 }
9816 }
9817
9818 /* Bitfield operations.
9819 These take a pair of bit positions r and s which are in {0..31}
9820 or {0..63} depending on the instruction word size.
9821 N.B register args may not be SP. */
9822
9823 /* OK, we start with ubfm which just needs to pick
9824 some bits out of source zero the rest and write
9825 the result to dest. Just need two logical shifts. */
9826
9827 /* 32 bit bitfield move, left and right of affected zeroed
9828 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9829 static void
9830 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9831 {
9832 unsigned rd;
9833 unsigned rn = INSTR (9, 5);
9834 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9835
9836 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9837 if (r <= s)
9838 {
9839 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9840 We want only bits s:xxx:r at the bottom of the word
9841 so we LSL bit s up to bit 31 i.e. by 31 - s
9842 and then we LSR to bring bit 31 down to bit s - r
9843 i.e. by 31 + r - s. */
9844 value <<= 31 - s;
9845 value >>= 31 + r - s;
9846 }
9847 else
9848 {
9849 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9850 We want only bits s:xxx:0 starting at it 31-(r-1)
9851 so we LSL bit s up to bit 31 i.e. by 31 - s
9852 and then we LSL to bring bit 31 down to 31-(r-1)+s
9853 i.e. by r - (s + 1). */
9854 value <<= 31 - s;
9855 value >>= r - (s + 1);
9856 }
9857
9858 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9859 rd = INSTR (4, 0);
9860 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9861 }
9862
9863 /* 64 bit bitfield move, left and right of affected zeroed
9864 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9865 static void
9866 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9867 {
9868 unsigned rd;
9869 unsigned rn = INSTR (9, 5);
9870 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9871
9872 if (r <= s)
9873 {
9874 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9875 We want only bits s:xxx:r at the bottom of the word.
9876 So we LSL bit s up to bit 63 i.e. by 63 - s
9877 and then we LSR to bring bit 63 down to bit s - r
9878 i.e. by 63 + r - s. */
9879 value <<= 63 - s;
9880 value >>= 63 + r - s;
9881 }
9882 else
9883 {
9884 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9885 We want only bits s:xxx:0 starting at it 63-(r-1).
9886 So we LSL bit s up to bit 63 i.e. by 63 - s
9887 and then we LSL to bring bit 63 down to 63-(r-1)+s
9888 i.e. by r - (s + 1). */
9889 value <<= 63 - s;
9890 value >>= r - (s + 1);
9891 }
9892
9893 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9894 rd = INSTR (4, 0);
9895 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9896 }
9897
9898 /* The signed versions need to insert sign bits
9899 on the left of the inserted bit field. so we do
9900 much the same as the unsigned version except we
9901 use an arithmetic shift right -- this just means
9902 we need to operate on signed values. */
9903
9904 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
9905 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9906 static void
9907 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9908 {
9909 unsigned rd;
9910 unsigned rn = INSTR (9, 5);
9911 /* as per ubfm32 but use an ASR instead of an LSR. */
9912 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9913
9914 if (r <= s)
9915 {
9916 value <<= 31 - s;
9917 value >>= 31 + r - s;
9918 }
9919 else
9920 {
9921 value <<= 31 - s;
9922 value >>= r - (s + 1);
9923 }
9924
9925 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9926 rd = INSTR (4, 0);
9927 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
9928 }
9929
9930 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
9931 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9932 static void
9933 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9934 {
9935 unsigned rd;
9936 unsigned rn = INSTR (9, 5);
9937 /* acpu per ubfm but use an ASR instead of an LSR. */
9938 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
9939
9940 if (r <= s)
9941 {
9942 value <<= 63 - s;
9943 value >>= 63 + r - s;
9944 }
9945 else
9946 {
9947 value <<= 63 - s;
9948 value >>= r - (s + 1);
9949 }
9950
9951 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9952 rd = INSTR (4, 0);
9953 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
9954 }
9955
9956 /* Finally, these versions leave non-affected bits
9957 as is. so we need to generate the bits as per
9958 ubfm and also generate a mask to pick the
9959 bits from the original and computed values. */
9960
9961 /* 32 bit bitfield move, non-affected bits left as is.
9962 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9963 static void
9964 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9965 {
9966 unsigned rn = INSTR (9, 5);
9967 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9968 uint32_t mask = -1;
9969 unsigned rd;
9970 uint32_t value2;
9971
9972 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9973 if (r <= s)
9974 {
9975 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9976 We want only bits s:xxx:r at the bottom of the word
9977 so we LSL bit s up to bit 31 i.e. by 31 - s
9978 and then we LSR to bring bit 31 down to bit s - r
9979 i.e. by 31 + r - s. */
9980 value <<= 31 - s;
9981 value >>= 31 + r - s;
9982 /* the mask must include the same bits. */
9983 mask <<= 31 - s;
9984 mask >>= 31 + r - s;
9985 }
9986 else
9987 {
9988 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
9989 We want only bits s:xxx:0 starting at it 31-(r-1)
9990 so we LSL bit s up to bit 31 i.e. by 31 - s
9991 and then we LSL to bring bit 31 down to 31-(r-1)+s
9992 i.e. by r - (s + 1). */
9993 value <<= 31 - s;
9994 value >>= r - (s + 1);
9995 /* The mask must include the same bits. */
9996 mask <<= 31 - s;
9997 mask >>= r - (s + 1);
9998 }
9999
10000 rd = INSTR (4, 0);
10001 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10002
10003 value2 &= ~mask;
10004 value2 |= value;
10005
10006 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10007 aarch64_set_reg_u64
10008 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
10009 }
10010
10011 /* 64 bit bitfield move, non-affected bits left as is.
10012 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10013 static void
10014 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10015 {
10016 unsigned rd;
10017 unsigned rn = INSTR (9, 5);
10018 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10019 uint64_t mask = 0xffffffffffffffffULL;
10020
10021 if (r <= s)
10022 {
10023 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10024 We want only bits s:xxx:r at the bottom of the word
10025 so we LSL bit s up to bit 63 i.e. by 63 - s
10026 and then we LSR to bring bit 63 down to bit s - r
10027 i.e. by 63 + r - s. */
10028 value <<= 63 - s;
10029 value >>= 63 + r - s;
10030 /* The mask must include the same bits. */
10031 mask <<= 63 - s;
10032 mask >>= 63 + r - s;
10033 }
10034 else
10035 {
10036 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10037 We want only bits s:xxx:0 starting at it 63-(r-1)
10038 so we LSL bit s up to bit 63 i.e. by 63 - s
10039 and then we LSL to bring bit 63 down to 63-(r-1)+s
10040 i.e. by r - (s + 1). */
10041 value <<= 63 - s;
10042 value >>= r - (s + 1);
10043 /* The mask must include the same bits. */
10044 mask <<= 63 - s;
10045 mask >>= r - (s + 1);
10046 }
10047
10048 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10049 rd = INSTR (4, 0);
10050 aarch64_set_reg_u64
10051 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10052 }
10053
10054 static void
10055 dexBitfieldImmediate (sim_cpu *cpu)
10056 {
10057 /* assert instr[28:23] = 100110
10058 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10059 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10060 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10061 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10062 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10063 instr[9,5] = Rn
10064 instr[4,0] = Rd */
10065
10066 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10067 uint32_t dispatch;
10068 uint32_t imms;
10069 uint32_t size = INSTR (31, 31);
10070 uint32_t N = INSTR (22, 22);
10071 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
10072 /* or else we have an UNALLOC. */
10073 uint32_t immr = INSTR (21, 16);
10074
10075 if (~size & N)
10076 HALT_UNALLOC;
10077
10078 if (!size && uimm (immr, 5, 5))
10079 HALT_UNALLOC;
10080
10081 imms = INSTR (15, 10);
10082 if (!size && uimm (imms, 5, 5))
10083 HALT_UNALLOC;
10084
10085 /* Switch on combined size and op. */
10086 dispatch = INSTR (31, 29);
10087 switch (dispatch)
10088 {
10089 case 0: sbfm32 (cpu, immr, imms); return;
10090 case 1: bfm32 (cpu, immr, imms); return;
10091 case 2: ubfm32 (cpu, immr, imms); return;
10092 case 4: sbfm (cpu, immr, imms); return;
10093 case 5: bfm (cpu, immr, imms); return;
10094 case 6: ubfm (cpu, immr, imms); return;
10095 default: HALT_UNALLOC;
10096 }
10097 }
10098
10099 static void
10100 do_EXTR_32 (sim_cpu *cpu)
10101 {
10102 /* instr[31:21] = 00010011100
10103 instr[20,16] = Rm
10104 instr[15,10] = imms : 0xxxxx for 32 bit
10105 instr[9,5] = Rn
10106 instr[4,0] = Rd */
10107 unsigned rm = INSTR (20, 16);
10108 unsigned imms = INSTR (15, 10) & 31;
10109 unsigned rn = INSTR ( 9, 5);
10110 unsigned rd = INSTR ( 4, 0);
10111 uint64_t val1;
10112 uint64_t val2;
10113
10114 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10115 val1 >>= imms;
10116 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10117 val2 <<= (32 - imms);
10118
10119 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10120 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10121 }
10122
10123 static void
10124 do_EXTR_64 (sim_cpu *cpu)
10125 {
10126 /* instr[31:21] = 10010011100
10127 instr[20,16] = Rm
10128 instr[15,10] = imms
10129 instr[9,5] = Rn
10130 instr[4,0] = Rd */
10131 unsigned rm = INSTR (20, 16);
10132 unsigned imms = INSTR (15, 10) & 63;
10133 unsigned rn = INSTR ( 9, 5);
10134 unsigned rd = INSTR ( 4, 0);
10135 uint64_t val;
10136
10137 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10138 val >>= imms;
10139 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10140
10141 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10142 }
10143
10144 static void
10145 dexExtractImmediate (sim_cpu *cpu)
10146 {
10147 /* assert instr[28:23] = 100111
10148 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10149 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10150 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10151 instr[21] = op0 : must be 0 or UNALLOC
10152 instr[20,16] = Rm
10153 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10154 instr[9,5] = Rn
10155 instr[4,0] = Rd */
10156
10157 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10158 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10159 uint32_t dispatch;
10160 uint32_t size = INSTR (31, 31);
10161 uint32_t N = INSTR (22, 22);
10162 /* 32 bit operations must have imms[5] = 0
10163 or else we have an UNALLOC. */
10164 uint32_t imms = INSTR (15, 10);
10165
10166 if (size ^ N)
10167 HALT_UNALLOC;
10168
10169 if (!size && uimm (imms, 5, 5))
10170 HALT_UNALLOC;
10171
10172 /* Switch on combined size and op. */
10173 dispatch = INSTR (31, 29);
10174
10175 if (dispatch == 0)
10176 do_EXTR_32 (cpu);
10177
10178 else if (dispatch == 4)
10179 do_EXTR_64 (cpu);
10180
10181 else if (dispatch == 1)
10182 HALT_NYI;
10183 else
10184 HALT_UNALLOC;
10185 }
10186
10187 static void
10188 dexDPImm (sim_cpu *cpu)
10189 {
10190 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10191 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10192 bits [25,23] of a DPImm are the secondary dispatch vector. */
10193 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10194
10195 switch (group2)
10196 {
10197 case DPIMM_PCADR_000:
10198 case DPIMM_PCADR_001:
10199 dexPCRelAddressing (cpu);
10200 return;
10201
10202 case DPIMM_ADDSUB_010:
10203 case DPIMM_ADDSUB_011:
10204 dexAddSubtractImmediate (cpu);
10205 return;
10206
10207 case DPIMM_LOG_100:
10208 dexLogicalImmediate (cpu);
10209 return;
10210
10211 case DPIMM_MOV_101:
10212 dexMoveWideImmediate (cpu);
10213 return;
10214
10215 case DPIMM_BITF_110:
10216 dexBitfieldImmediate (cpu);
10217 return;
10218
10219 case DPIMM_EXTR_111:
10220 dexExtractImmediate (cpu);
10221 return;
10222
10223 default:
10224 /* Should never reach here. */
10225 HALT_NYI;
10226 }
10227 }
10228
10229 static void
10230 dexLoadUnscaledImmediate (sim_cpu *cpu)
10231 {
10232 /* instr[29,24] == 111_00
10233 instr[21] == 0
10234 instr[11,10] == 00
10235 instr[31,30] = size
10236 instr[26] = V
10237 instr[23,22] = opc
10238 instr[20,12] = simm9
10239 instr[9,5] = rn may be SP. */
10240 /* unsigned rt = INSTR (4, 0); */
10241 uint32_t V = INSTR (26, 26);
10242 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10243 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10244
10245 if (!V)
10246 {
10247 /* GReg operations. */
10248 switch (dispatch)
10249 {
10250 case 0: sturb (cpu, imm); return;
10251 case 1: ldurb32 (cpu, imm); return;
10252 case 2: ldursb64 (cpu, imm); return;
10253 case 3: ldursb32 (cpu, imm); return;
10254 case 4: sturh (cpu, imm); return;
10255 case 5: ldurh32 (cpu, imm); return;
10256 case 6: ldursh64 (cpu, imm); return;
10257 case 7: ldursh32 (cpu, imm); return;
10258 case 8: stur32 (cpu, imm); return;
10259 case 9: ldur32 (cpu, imm); return;
10260 case 10: ldursw (cpu, imm); return;
10261 case 12: stur64 (cpu, imm); return;
10262 case 13: ldur64 (cpu, imm); return;
10263
10264 case 14:
10265 /* PRFUM NYI. */
10266 HALT_NYI;
10267
10268 default:
10269 case 11:
10270 case 15:
10271 HALT_UNALLOC;
10272 }
10273 }
10274
10275 /* FReg operations. */
10276 switch (dispatch)
10277 {
10278 case 2: fsturq (cpu, imm); return;
10279 case 3: fldurq (cpu, imm); return;
10280 case 8: fsturs (cpu, imm); return;
10281 case 9: fldurs (cpu, imm); return;
10282 case 12: fsturd (cpu, imm); return;
10283 case 13: fldurd (cpu, imm); return;
10284
10285 case 0: /* STUR 8 bit FP. */
10286 case 1: /* LDUR 8 bit FP. */
10287 case 4: /* STUR 16 bit FP. */
10288 case 5: /* LDUR 8 bit FP. */
10289 HALT_NYI;
10290
10291 default:
10292 case 6:
10293 case 7:
10294 case 10:
10295 case 11:
10296 case 14:
10297 case 15:
10298 HALT_UNALLOC;
10299 }
10300 }
10301
10302 /* N.B. A preliminary note regarding all the ldrs<x>32
10303 instructions
10304
10305 The signed value loaded by these instructions is cast to unsigned
10306 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10307 64 bit element of the GReg union. this performs a 32 bit sign extension
10308 (as required) but avoids 64 bit sign extension, thus ensuring that the
10309 top half of the register word is zero. this is what the spec demands
10310 when a 32 bit load occurs. */
10311
10312 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10313 static void
10314 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10315 {
10316 unsigned int rn = INSTR (9, 5);
10317 unsigned int rt = INSTR (4, 0);
10318
10319 /* The target register may not be SP but the source may be
10320 there is no scaling required for a byte load. */
10321 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10322 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10323 (int64_t) aarch64_get_mem_s8 (cpu, address));
10324 }
10325
10326 /* 32 bit load sign-extended byte scaled or unscaled zero-
10327 or sign-extended 32-bit register offset. */
10328 static void
10329 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10330 {
10331 unsigned int rm = INSTR (20, 16);
10332 unsigned int rn = INSTR (9, 5);
10333 unsigned int rt = INSTR (4, 0);
10334
10335 /* rn may reference SP, rm and rt must reference ZR. */
10336
10337 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10338 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10339 extension);
10340
10341 /* There is no scaling required for a byte load. */
10342 aarch64_set_reg_u64
10343 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10344 + displacement));
10345 }
10346
10347 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10348 pre- or post-writeback. */
10349 static void
10350 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10351 {
10352 uint64_t address;
10353 unsigned int rn = INSTR (9, 5);
10354 unsigned int rt = INSTR (4, 0);
10355
10356 if (rn == rt && wb != NoWriteBack)
10357 HALT_UNALLOC;
10358
10359 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10360
10361 if (wb == Pre)
10362 address += offset;
10363
10364 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10365 (int64_t) aarch64_get_mem_s8 (cpu, address));
10366
10367 if (wb == Post)
10368 address += offset;
10369
10370 if (wb != NoWriteBack)
10371 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10372 }
10373
10374 /* 8 bit store scaled. */
10375 static void
10376 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10377 {
10378 unsigned st = INSTR (4, 0);
10379 unsigned rn = INSTR (9, 5);
10380
10381 aarch64_set_mem_u8 (cpu,
10382 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10383 aarch64_get_vec_u8 (cpu, st, 0));
10384 }
10385
10386 /* 8 bit store scaled or unscaled zero- or
10387 sign-extended 8-bit register offset. */
10388 static void
10389 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10390 {
10391 unsigned rm = INSTR (20, 16);
10392 unsigned rn = INSTR (9, 5);
10393 unsigned st = INSTR (4, 0);
10394
10395 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10396 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10397 extension);
10398 uint64_t displacement = scaling == Scaled ? extended : 0;
10399
10400 aarch64_set_mem_u8
10401 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10402 }
10403
10404 /* 16 bit store scaled. */
10405 static void
10406 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10407 {
10408 unsigned st = INSTR (4, 0);
10409 unsigned rn = INSTR (9, 5);
10410
10411 aarch64_set_mem_u16
10412 (cpu,
10413 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10414 aarch64_get_vec_u16 (cpu, st, 0));
10415 }
10416
10417 /* 16 bit store scaled or unscaled zero-
10418 or sign-extended 16-bit register offset. */
10419 static void
10420 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10421 {
10422 unsigned rm = INSTR (20, 16);
10423 unsigned rn = INSTR (9, 5);
10424 unsigned st = INSTR (4, 0);
10425
10426 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10427 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10428 extension);
10429 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10430
10431 aarch64_set_mem_u16
10432 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10433 }
10434
10435 /* 32 bit store scaled unsigned 12 bit. */
10436 static void
10437 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10438 {
10439 unsigned st = INSTR (4, 0);
10440 unsigned rn = INSTR (9, 5);
10441
10442 aarch64_set_mem_u32
10443 (cpu,
10444 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10445 aarch64_get_vec_u32 (cpu, st, 0));
10446 }
10447
10448 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10449 static void
10450 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10451 {
10452 unsigned rn = INSTR (9, 5);
10453 unsigned st = INSTR (4, 0);
10454
10455 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10456
10457 if (wb != Post)
10458 address += offset;
10459
10460 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10461
10462 if (wb == Post)
10463 address += offset;
10464
10465 if (wb != NoWriteBack)
10466 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10467 }
10468
10469 /* 32 bit store scaled or unscaled zero-
10470 or sign-extended 32-bit register offset. */
10471 static void
10472 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10473 {
10474 unsigned rm = INSTR (20, 16);
10475 unsigned rn = INSTR (9, 5);
10476 unsigned st = INSTR (4, 0);
10477
10478 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10479 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10480 extension);
10481 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10482
10483 aarch64_set_mem_u32
10484 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10485 }
10486
10487 /* 64 bit store scaled unsigned 12 bit. */
10488 static void
10489 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10490 {
10491 unsigned st = INSTR (4, 0);
10492 unsigned rn = INSTR (9, 5);
10493
10494 aarch64_set_mem_u64
10495 (cpu,
10496 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10497 aarch64_get_vec_u64 (cpu, st, 0));
10498 }
10499
10500 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10501 static void
10502 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10503 {
10504 unsigned rn = INSTR (9, 5);
10505 unsigned st = INSTR (4, 0);
10506
10507 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10508
10509 if (wb != Post)
10510 address += offset;
10511
10512 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10513
10514 if (wb == Post)
10515 address += offset;
10516
10517 if (wb != NoWriteBack)
10518 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10519 }
10520
10521 /* 64 bit store scaled or unscaled zero-
10522 or sign-extended 32-bit register offset. */
10523 static void
10524 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10525 {
10526 unsigned rm = INSTR (20, 16);
10527 unsigned rn = INSTR (9, 5);
10528 unsigned st = INSTR (4, 0);
10529
10530 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10531 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10532 extension);
10533 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10534
10535 aarch64_set_mem_u64
10536 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10537 }
10538
10539 /* 128 bit store scaled unsigned 12 bit. */
10540 static void
10541 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10542 {
10543 FRegister a;
10544 unsigned st = INSTR (4, 0);
10545 unsigned rn = INSTR (9, 5);
10546 uint64_t addr;
10547
10548 aarch64_get_FP_long_double (cpu, st, & a);
10549
10550 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10551 aarch64_set_mem_long_double (cpu, addr, a);
10552 }
10553
10554 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10555 static void
10556 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10557 {
10558 FRegister a;
10559 unsigned rn = INSTR (9, 5);
10560 unsigned st = INSTR (4, 0);
10561 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10562
10563 if (wb != Post)
10564 address += offset;
10565
10566 aarch64_get_FP_long_double (cpu, st, & a);
10567 aarch64_set_mem_long_double (cpu, address, a);
10568
10569 if (wb == Post)
10570 address += offset;
10571
10572 if (wb != NoWriteBack)
10573 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10574 }
10575
10576 /* 128 bit store scaled or unscaled zero-
10577 or sign-extended 32-bit register offset. */
10578 static void
10579 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10580 {
10581 unsigned rm = INSTR (20, 16);
10582 unsigned rn = INSTR (9, 5);
10583 unsigned st = INSTR (4, 0);
10584
10585 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10586 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10587 extension);
10588 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10589
10590 FRegister a;
10591
10592 aarch64_get_FP_long_double (cpu, st, & a);
10593 aarch64_set_mem_long_double (cpu, address + displacement, a);
10594 }
10595
10596 static void
10597 dexLoadImmediatePrePost (sim_cpu *cpu)
10598 {
10599 /* instr[31,30] = size
10600 instr[29,27] = 111
10601 instr[26] = V
10602 instr[25,24] = 00
10603 instr[23,22] = opc
10604 instr[21] = 0
10605 instr[20,12] = simm9
10606 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10607 instr[10] = 0
10608 instr[9,5] = Rn may be SP.
10609 instr[4,0] = Rt */
10610
10611 uint32_t V = INSTR (26, 26);
10612 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10613 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10614 WriteBack wb = INSTR (11, 11);
10615
10616 if (!V)
10617 {
10618 /* GReg operations. */
10619 switch (dispatch)
10620 {
10621 case 0: strb_wb (cpu, imm, wb); return;
10622 case 1: ldrb32_wb (cpu, imm, wb); return;
10623 case 2: ldrsb_wb (cpu, imm, wb); return;
10624 case 3: ldrsb32_wb (cpu, imm, wb); return;
10625 case 4: strh_wb (cpu, imm, wb); return;
10626 case 5: ldrh32_wb (cpu, imm, wb); return;
10627 case 6: ldrsh64_wb (cpu, imm, wb); return;
10628 case 7: ldrsh32_wb (cpu, imm, wb); return;
10629 case 8: str32_wb (cpu, imm, wb); return;
10630 case 9: ldr32_wb (cpu, imm, wb); return;
10631 case 10: ldrsw_wb (cpu, imm, wb); return;
10632 case 12: str_wb (cpu, imm, wb); return;
10633 case 13: ldr_wb (cpu, imm, wb); return;
10634
10635 default:
10636 case 11:
10637 case 14:
10638 case 15:
10639 HALT_UNALLOC;
10640 }
10641 }
10642
10643 /* FReg operations. */
10644 switch (dispatch)
10645 {
10646 case 2: fstrq_wb (cpu, imm, wb); return;
10647 case 3: fldrq_wb (cpu, imm, wb); return;
10648 case 8: fstrs_wb (cpu, imm, wb); return;
10649 case 9: fldrs_wb (cpu, imm, wb); return;
10650 case 12: fstrd_wb (cpu, imm, wb); return;
10651 case 13: fldrd_wb (cpu, imm, wb); return;
10652
10653 case 0: /* STUR 8 bit FP. */
10654 case 1: /* LDUR 8 bit FP. */
10655 case 4: /* STUR 16 bit FP. */
10656 case 5: /* LDUR 8 bit FP. */
10657 HALT_NYI;
10658
10659 default:
10660 case 6:
10661 case 7:
10662 case 10:
10663 case 11:
10664 case 14:
10665 case 15:
10666 HALT_UNALLOC;
10667 }
10668 }
10669
10670 static void
10671 dexLoadRegisterOffset (sim_cpu *cpu)
10672 {
10673 /* instr[31,30] = size
10674 instr[29,27] = 111
10675 instr[26] = V
10676 instr[25,24] = 00
10677 instr[23,22] = opc
10678 instr[21] = 1
10679 instr[20,16] = rm
10680 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10681 110 ==> SXTW, 111 ==> SXTX,
10682 ow ==> RESERVED
10683 instr[12] = scaled
10684 instr[11,10] = 10
10685 instr[9,5] = rn
10686 instr[4,0] = rt. */
10687
10688 uint32_t V = INSTR (26, 26);
10689 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10690 Scaling scale = INSTR (12, 12);
10691 Extension extensionType = INSTR (15, 13);
10692
10693 /* Check for illegal extension types. */
10694 if (uimm (extensionType, 1, 1) == 0)
10695 HALT_UNALLOC;
10696
10697 if (extensionType == UXTX || extensionType == SXTX)
10698 extensionType = NoExtension;
10699
10700 if (!V)
10701 {
10702 /* GReg operations. */
10703 switch (dispatch)
10704 {
10705 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10706 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10707 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10708 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10709 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10710 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10711 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10712 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10713 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10714 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10715 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10716 case 12: str_scale_ext (cpu, scale, extensionType); return;
10717 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10718 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10719
10720 default:
10721 case 11:
10722 case 15:
10723 HALT_UNALLOC;
10724 }
10725 }
10726
10727 /* FReg operations. */
10728 switch (dispatch)
10729 {
10730 case 1: /* LDUR 8 bit FP. */
10731 HALT_NYI;
10732 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10733 case 5: /* LDUR 8 bit FP. */
10734 HALT_NYI;
10735 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10736 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10737
10738 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
10739 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
10740 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
10741 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
10742 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10743
10744 default:
10745 case 6:
10746 case 7:
10747 case 10:
10748 case 11:
10749 case 14:
10750 case 15:
10751 HALT_UNALLOC;
10752 }
10753 }
10754
10755 static void
10756 dexLoadUnsignedImmediate (sim_cpu *cpu)
10757 {
10758 /* instr[29,24] == 111_01
10759 instr[31,30] = size
10760 instr[26] = V
10761 instr[23,22] = opc
10762 instr[21,10] = uimm12 : unsigned immediate offset
10763 instr[9,5] = rn may be SP.
10764 instr[4,0] = rt. */
10765
10766 uint32_t V = INSTR (26,26);
10767 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10768 uint32_t imm = INSTR (21, 10);
10769
10770 if (!V)
10771 {
10772 /* GReg operations. */
10773 switch (dispatch)
10774 {
10775 case 0: strb_abs (cpu, imm); return;
10776 case 1: ldrb32_abs (cpu, imm); return;
10777 case 2: ldrsb_abs (cpu, imm); return;
10778 case 3: ldrsb32_abs (cpu, imm); return;
10779 case 4: strh_abs (cpu, imm); return;
10780 case 5: ldrh32_abs (cpu, imm); return;
10781 case 6: ldrsh_abs (cpu, imm); return;
10782 case 7: ldrsh32_abs (cpu, imm); return;
10783 case 8: str32_abs (cpu, imm); return;
10784 case 9: ldr32_abs (cpu, imm); return;
10785 case 10: ldrsw_abs (cpu, imm); return;
10786 case 12: str_abs (cpu, imm); return;
10787 case 13: ldr_abs (cpu, imm); return;
10788 case 14: prfm_abs (cpu, imm); return;
10789
10790 default:
10791 case 11:
10792 case 15:
10793 HALT_UNALLOC;
10794 }
10795 }
10796
10797 /* FReg operations. */
10798 switch (dispatch)
10799 {
10800 case 0: fstrb_abs (cpu, imm); return;
10801 case 4: fstrh_abs (cpu, imm); return;
10802 case 8: fstrs_abs (cpu, imm); return;
10803 case 12: fstrd_abs (cpu, imm); return;
10804 case 2: fstrq_abs (cpu, imm); return;
10805
10806 case 1: fldrb_abs (cpu, imm); return;
10807 case 5: fldrh_abs (cpu, imm); return;
10808 case 9: fldrs_abs (cpu, imm); return;
10809 case 13: fldrd_abs (cpu, imm); return;
10810 case 3: fldrq_abs (cpu, imm); return;
10811
10812 default:
10813 case 6:
10814 case 7:
10815 case 10:
10816 case 11:
10817 case 14:
10818 case 15:
10819 HALT_UNALLOC;
10820 }
10821 }
10822
10823 static void
10824 dexLoadExclusive (sim_cpu *cpu)
10825 {
10826 /* assert instr[29:24] = 001000;
10827 instr[31,30] = size
10828 instr[23] = 0 if exclusive
10829 instr[22] = L : 1 if load, 0 if store
10830 instr[21] = 1 if pair
10831 instr[20,16] = Rs
10832 instr[15] = o0 : 1 if ordered
10833 instr[14,10] = Rt2
10834 instr[9,5] = Rn
10835 instr[4.0] = Rt. */
10836
10837 switch (INSTR (22, 21))
10838 {
10839 case 2: ldxr (cpu); return;
10840 case 0: stxr (cpu); return;
10841 default: HALT_NYI;
10842 }
10843 }
10844
10845 static void
10846 dexLoadOther (sim_cpu *cpu)
10847 {
10848 uint32_t dispatch;
10849
10850 /* instr[29,25] = 111_0
10851 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10852 instr[21:11,10] is the secondary dispatch. */
10853 if (INSTR (24, 24))
10854 {
10855 dexLoadUnsignedImmediate (cpu);
10856 return;
10857 }
10858
10859 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
10860 switch (dispatch)
10861 {
10862 case 0: dexLoadUnscaledImmediate (cpu); return;
10863 case 1: dexLoadImmediatePrePost (cpu); return;
10864 case 3: dexLoadImmediatePrePost (cpu); return;
10865 case 6: dexLoadRegisterOffset (cpu); return;
10866
10867 default:
10868 case 2:
10869 case 4:
10870 case 5:
10871 case 7:
10872 HALT_NYI;
10873 }
10874 }
10875
10876 static void
10877 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10878 {
10879 unsigned rn = INSTR (14, 10);
10880 unsigned rd = INSTR (9, 5);
10881 unsigned rm = INSTR (4, 0);
10882 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10883
10884 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10885 HALT_UNALLOC; /* ??? */
10886
10887 offset <<= 2;
10888
10889 if (wb != Post)
10890 address += offset;
10891
10892 aarch64_set_mem_u32 (cpu, address,
10893 aarch64_get_reg_u32 (cpu, rm, NO_SP));
10894 aarch64_set_mem_u32 (cpu, address + 4,
10895 aarch64_get_reg_u32 (cpu, rn, NO_SP));
10896
10897 if (wb == Post)
10898 address += offset;
10899
10900 if (wb != NoWriteBack)
10901 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10902 }
10903
10904 static void
10905 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10906 {
10907 unsigned rn = INSTR (14, 10);
10908 unsigned rd = INSTR (9, 5);
10909 unsigned rm = INSTR (4, 0);
10910 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10911
10912 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10913 HALT_UNALLOC; /* ??? */
10914
10915 offset <<= 3;
10916
10917 if (wb != Post)
10918 address += offset;
10919
10920 aarch64_set_mem_u64 (cpu, address,
10921 aarch64_get_reg_u64 (cpu, rm, NO_SP));
10922 aarch64_set_mem_u64 (cpu, address + 8,
10923 aarch64_get_reg_u64 (cpu, rn, NO_SP));
10924
10925 if (wb == Post)
10926 address += offset;
10927
10928 if (wb != NoWriteBack)
10929 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10930 }
10931
10932 static void
10933 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10934 {
10935 unsigned rn = INSTR (14, 10);
10936 unsigned rd = INSTR (9, 5);
10937 unsigned rm = INSTR (4, 0);
10938 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10939
10940 /* Treat this as unalloc to make sure we don't do it. */
10941 if (rn == rm)
10942 HALT_UNALLOC;
10943
10944 offset <<= 2;
10945
10946 if (wb != Post)
10947 address += offset;
10948
10949 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
10950 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
10951
10952 if (wb == Post)
10953 address += offset;
10954
10955 if (wb != NoWriteBack)
10956 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10957 }
10958
10959 static void
10960 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10961 {
10962 unsigned rn = INSTR (14, 10);
10963 unsigned rd = INSTR (9, 5);
10964 unsigned rm = INSTR (4, 0);
10965 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10966
10967 /* Treat this as unalloc to make sure we don't do it. */
10968 if (rn == rm)
10969 HALT_UNALLOC;
10970
10971 offset <<= 2;
10972
10973 if (wb != Post)
10974 address += offset;
10975
10976 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
10977 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
10978
10979 if (wb == Post)
10980 address += offset;
10981
10982 if (wb != NoWriteBack)
10983 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10984 }
10985
10986 static void
10987 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10988 {
10989 unsigned rn = INSTR (14, 10);
10990 unsigned rd = INSTR (9, 5);
10991 unsigned rm = INSTR (4, 0);
10992 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10993
10994 /* Treat this as unalloc to make sure we don't do it. */
10995 if (rn == rm)
10996 HALT_UNALLOC;
10997
10998 offset <<= 3;
10999
11000 if (wb != Post)
11001 address += offset;
11002
11003 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
11004 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
11005
11006 if (wb == Post)
11007 address += offset;
11008
11009 if (wb != NoWriteBack)
11010 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11011 }
11012
11013 static void
11014 dex_load_store_pair_gr (sim_cpu *cpu)
11015 {
11016 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
11017 instr[29,25] = instruction encoding: 101_0
11018 instr[26] = V : 1 if fp 0 if gp
11019 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11020 instr[22] = load/store (1=> load)
11021 instr[21,15] = signed, scaled, offset
11022 instr[14,10] = Rn
11023 instr[ 9, 5] = Rd
11024 instr[ 4, 0] = Rm. */
11025
11026 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11027 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11028
11029 switch (dispatch)
11030 {
11031 case 2: store_pair_u32 (cpu, offset, Post); return;
11032 case 3: load_pair_u32 (cpu, offset, Post); return;
11033 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11034 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
11035 case 6: store_pair_u32 (cpu, offset, Pre); return;
11036 case 7: load_pair_u32 (cpu, offset, Pre); return;
11037
11038 case 11: load_pair_s32 (cpu, offset, Post); return;
11039 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
11040 case 15: load_pair_s32 (cpu, offset, Pre); return;
11041
11042 case 18: store_pair_u64 (cpu, offset, Post); return;
11043 case 19: load_pair_u64 (cpu, offset, Post); return;
11044 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11045 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
11046 case 22: store_pair_u64 (cpu, offset, Pre); return;
11047 case 23: load_pair_u64 (cpu, offset, Pre); return;
11048
11049 default:
11050 HALT_UNALLOC;
11051 }
11052 }
11053
11054 static void
11055 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11056 {
11057 unsigned rn = INSTR (14, 10);
11058 unsigned rd = INSTR (9, 5);
11059 unsigned rm = INSTR (4, 0);
11060 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11061
11062 offset <<= 2;
11063
11064 if (wb != Post)
11065 address += offset;
11066
11067 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
11068 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11069
11070 if (wb == Post)
11071 address += offset;
11072
11073 if (wb != NoWriteBack)
11074 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11075 }
11076
11077 static void
11078 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11079 {
11080 unsigned rn = INSTR (14, 10);
11081 unsigned rd = INSTR (9, 5);
11082 unsigned rm = INSTR (4, 0);
11083 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11084
11085 offset <<= 3;
11086
11087 if (wb != Post)
11088 address += offset;
11089
11090 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
11091 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11092
11093 if (wb == Post)
11094 address += offset;
11095
11096 if (wb != NoWriteBack)
11097 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11098 }
11099
11100 static void
11101 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11102 {
11103 FRegister a;
11104 unsigned rn = INSTR (14, 10);
11105 unsigned rd = INSTR (9, 5);
11106 unsigned rm = INSTR (4, 0);
11107 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11108
11109 offset <<= 4;
11110
11111 if (wb != Post)
11112 address += offset;
11113
11114 aarch64_get_FP_long_double (cpu, rm, & a);
11115 aarch64_set_mem_long_double (cpu, address, a);
11116 aarch64_get_FP_long_double (cpu, rn, & a);
11117 aarch64_set_mem_long_double (cpu, address + 16, a);
11118
11119 if (wb == Post)
11120 address += offset;
11121
11122 if (wb != NoWriteBack)
11123 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11124 }
11125
11126 static void
11127 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11128 {
11129 unsigned rn = INSTR (14, 10);
11130 unsigned rd = INSTR (9, 5);
11131 unsigned rm = INSTR (4, 0);
11132 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11133
11134 if (rm == rn)
11135 HALT_UNALLOC;
11136
11137 offset <<= 2;
11138
11139 if (wb != Post)
11140 address += offset;
11141
11142 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11143 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11144
11145 if (wb == Post)
11146 address += offset;
11147
11148 if (wb != NoWriteBack)
11149 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11150 }
11151
11152 static void
11153 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11154 {
11155 unsigned rn = INSTR (14, 10);
11156 unsigned rd = INSTR (9, 5);
11157 unsigned rm = INSTR (4, 0);
11158 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11159
11160 if (rm == rn)
11161 HALT_UNALLOC;
11162
11163 offset <<= 3;
11164
11165 if (wb != Post)
11166 address += offset;
11167
11168 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11169 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11170
11171 if (wb == Post)
11172 address += offset;
11173
11174 if (wb != NoWriteBack)
11175 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11176 }
11177
11178 static void
11179 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11180 {
11181 FRegister a;
11182 unsigned rn = INSTR (14, 10);
11183 unsigned rd = INSTR (9, 5);
11184 unsigned rm = INSTR (4, 0);
11185 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11186
11187 if (rm == rn)
11188 HALT_UNALLOC;
11189
11190 offset <<= 4;
11191
11192 if (wb != Post)
11193 address += offset;
11194
11195 aarch64_get_mem_long_double (cpu, address, & a);
11196 aarch64_set_FP_long_double (cpu, rm, a);
11197 aarch64_get_mem_long_double (cpu, address + 16, & a);
11198 aarch64_set_FP_long_double (cpu, rn, a);
11199
11200 if (wb == Post)
11201 address += offset;
11202
11203 if (wb != NoWriteBack)
11204 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11205 }
11206
11207 static void
11208 dex_load_store_pair_fp (sim_cpu *cpu)
11209 {
11210 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11211 instr[29,25] = instruction encoding
11212 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11213 instr[22] = load/store (1=> load)
11214 instr[21,15] = signed, scaled, offset
11215 instr[14,10] = Rn
11216 instr[ 9, 5] = Rd
11217 instr[ 4, 0] = Rm */
11218
11219 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11220 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11221
11222 switch (dispatch)
11223 {
11224 case 2: store_pair_float (cpu, offset, Post); return;
11225 case 3: load_pair_float (cpu, offset, Post); return;
11226 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11227 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11228 case 6: store_pair_float (cpu, offset, Pre); return;
11229 case 7: load_pair_float (cpu, offset, Pre); return;
11230
11231 case 10: store_pair_double (cpu, offset, Post); return;
11232 case 11: load_pair_double (cpu, offset, Post); return;
11233 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11234 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11235 case 14: store_pair_double (cpu, offset, Pre); return;
11236 case 15: load_pair_double (cpu, offset, Pre); return;
11237
11238 case 18: store_pair_long_double (cpu, offset, Post); return;
11239 case 19: load_pair_long_double (cpu, offset, Post); return;
11240 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11241 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11242 case 22: store_pair_long_double (cpu, offset, Pre); return;
11243 case 23: load_pair_long_double (cpu, offset, Pre); return;
11244
11245 default:
11246 HALT_UNALLOC;
11247 }
11248 }
11249
11250 static inline unsigned
11251 vec_reg (unsigned v, unsigned o)
11252 {
11253 return (v + o) & 0x3F;
11254 }
11255
11256 /* Load multiple N-element structures to N consecutive registers. */
11257 static void
11258 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
11259 {
11260 int all = INSTR (30, 30);
11261 unsigned size = INSTR (11, 10);
11262 unsigned vd = INSTR (4, 0);
11263 unsigned i;
11264
11265 switch (size)
11266 {
11267 case 0: /* 8-bit operations. */
11268 if (all)
11269 for (i = 0; i < (16 * N); i++)
11270 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
11271 aarch64_get_mem_u8 (cpu, address + i));
11272 else
11273 for (i = 0; i < (8 * N); i++)
11274 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
11275 aarch64_get_mem_u8 (cpu, address + i));
11276 return;
11277
11278 case 1: /* 16-bit operations. */
11279 if (all)
11280 for (i = 0; i < (8 * N); i++)
11281 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
11282 aarch64_get_mem_u16 (cpu, address + i * 2));
11283 else
11284 for (i = 0; i < (4 * N); i++)
11285 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
11286 aarch64_get_mem_u16 (cpu, address + i * 2));
11287 return;
11288
11289 case 2: /* 32-bit operations. */
11290 if (all)
11291 for (i = 0; i < (4 * N); i++)
11292 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
11293 aarch64_get_mem_u32 (cpu, address + i * 4));
11294 else
11295 for (i = 0; i < (2 * N); i++)
11296 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
11297 aarch64_get_mem_u32 (cpu, address + i * 4));
11298 return;
11299
11300 case 3: /* 64-bit operations. */
11301 if (all)
11302 for (i = 0; i < (2 * N); i++)
11303 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
11304 aarch64_get_mem_u64 (cpu, address + i * 8));
11305 else
11306 for (i = 0; i < N; i++)
11307 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
11308 aarch64_get_mem_u64 (cpu, address + i * 8));
11309 return;
11310 }
11311 }
11312
11313 /* LD4: load multiple 4-element to four consecutive registers. */
11314 static void
11315 LD4 (sim_cpu *cpu, uint64_t address)
11316 {
11317 vec_load (cpu, address, 4);
11318 }
11319
11320 /* LD3: load multiple 3-element structures to three consecutive registers. */
11321 static void
11322 LD3 (sim_cpu *cpu, uint64_t address)
11323 {
11324 vec_load (cpu, address, 3);
11325 }
11326
11327 /* LD2: load multiple 2-element structures to two consecutive registers. */
11328 static void
11329 LD2 (sim_cpu *cpu, uint64_t address)
11330 {
11331 vec_load (cpu, address, 2);
11332 }
11333
11334 /* Load multiple 1-element structures into one register. */
11335 static void
11336 LD1_1 (sim_cpu *cpu, uint64_t address)
11337 {
11338 int all = INSTR (30, 30);
11339 unsigned size = INSTR (11, 10);
11340 unsigned vd = INSTR (4, 0);
11341 unsigned i;
11342
11343 switch (size)
11344 {
11345 case 0:
11346 /* LD1 {Vd.16b}, addr, #16 */
11347 /* LD1 {Vd.8b}, addr, #8 */
11348 for (i = 0; i < (all ? 16 : 8); i++)
11349 aarch64_set_vec_u8 (cpu, vd, i,
11350 aarch64_get_mem_u8 (cpu, address + i));
11351 return;
11352
11353 case 1:
11354 /* LD1 {Vd.8h}, addr, #16 */
11355 /* LD1 {Vd.4h}, addr, #8 */
11356 for (i = 0; i < (all ? 8 : 4); i++)
11357 aarch64_set_vec_u16 (cpu, vd, i,
11358 aarch64_get_mem_u16 (cpu, address + i * 2));
11359 return;
11360
11361 case 2:
11362 /* LD1 {Vd.4s}, addr, #16 */
11363 /* LD1 {Vd.2s}, addr, #8 */
11364 for (i = 0; i < (all ? 4 : 2); i++)
11365 aarch64_set_vec_u32 (cpu, vd, i,
11366 aarch64_get_mem_u32 (cpu, address + i * 4));
11367 return;
11368
11369 case 3:
11370 /* LD1 {Vd.2d}, addr, #16 */
11371 /* LD1 {Vd.1d}, addr, #8 */
11372 for (i = 0; i < (all ? 2 : 1); i++)
11373 aarch64_set_vec_u64 (cpu, vd, i,
11374 aarch64_get_mem_u64 (cpu, address + i * 8));
11375 return;
11376 }
11377 }
11378
11379 /* Load multiple 1-element structures into two registers. */
11380 static void
11381 LD1_2 (sim_cpu *cpu, uint64_t address)
11382 {
11383 /* FIXME: This algorithm is *exactly* the same as the LD2 version.
11384 So why have two different instructions ? There must be something
11385 wrong somewhere. */
11386 vec_load (cpu, address, 2);
11387 }
11388
11389 /* Load multiple 1-element structures into three registers. */
11390 static void
11391 LD1_3 (sim_cpu *cpu, uint64_t address)
11392 {
11393 /* FIXME: This algorithm is *exactly* the same as the LD3 version.
11394 So why have two different instructions ? There must be something
11395 wrong somewhere. */
11396 vec_load (cpu, address, 3);
11397 }
11398
11399 /* Load multiple 1-element structures into four registers. */
11400 static void
11401 LD1_4 (sim_cpu *cpu, uint64_t address)
11402 {
11403 /* FIXME: This algorithm is *exactly* the same as the LD4 version.
11404 So why have two different instructions ? There must be something
11405 wrong somewhere. */
11406 vec_load (cpu, address, 4);
11407 }
11408
11409 /* Store multiple N-element structures to N consecutive registers. */
11410 static void
11411 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
11412 {
11413 int all = INSTR (30, 30);
11414 unsigned size = INSTR (11, 10);
11415 unsigned vd = INSTR (4, 0);
11416 unsigned i;
11417
11418 switch (size)
11419 {
11420 case 0: /* 8-bit operations. */
11421 if (all)
11422 for (i = 0; i < (16 * N); i++)
11423 aarch64_set_mem_u8
11424 (cpu, address + i,
11425 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
11426 else
11427 for (i = 0; i < (8 * N); i++)
11428 aarch64_set_mem_u8
11429 (cpu, address + i,
11430 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
11431 return;
11432
11433 case 1: /* 16-bit operations. */
11434 if (all)
11435 for (i = 0; i < (8 * N); i++)
11436 aarch64_set_mem_u16
11437 (cpu, address + i * 2,
11438 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
11439 else
11440 for (i = 0; i < (4 * N); i++)
11441 aarch64_set_mem_u16
11442 (cpu, address + i * 2,
11443 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
11444 return;
11445
11446 case 2: /* 32-bit operations. */
11447 if (all)
11448 for (i = 0; i < (4 * N); i++)
11449 aarch64_set_mem_u32
11450 (cpu, address + i * 4,
11451 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
11452 else
11453 for (i = 0; i < (2 * N); i++)
11454 aarch64_set_mem_u32
11455 (cpu, address + i * 4,
11456 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
11457 return;
11458
11459 case 3: /* 64-bit operations. */
11460 if (all)
11461 for (i = 0; i < (2 * N); i++)
11462 aarch64_set_mem_u64
11463 (cpu, address + i * 8,
11464 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
11465 else
11466 for (i = 0; i < N; i++)
11467 aarch64_set_mem_u64
11468 (cpu, address + i * 8,
11469 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
11470 return;
11471 }
11472 }
11473
11474 /* Store multiple 4-element structure to four consecutive registers. */
11475 static void
11476 ST4 (sim_cpu *cpu, uint64_t address)
11477 {
11478 vec_store (cpu, address, 4);
11479 }
11480
11481 /* Store multiple 3-element structures to three consecutive registers. */
11482 static void
11483 ST3 (sim_cpu *cpu, uint64_t address)
11484 {
11485 vec_store (cpu, address, 3);
11486 }
11487
11488 /* Store multiple 2-element structures to two consecutive registers. */
11489 static void
11490 ST2 (sim_cpu *cpu, uint64_t address)
11491 {
11492 vec_store (cpu, address, 2);
11493 }
11494
11495 /* Store multiple 1-element structures into one register. */
11496 static void
11497 ST1_1 (sim_cpu *cpu, uint64_t address)
11498 {
11499 int all = INSTR (30, 30);
11500 unsigned size = INSTR (11, 10);
11501 unsigned vd = INSTR (4, 0);
11502 unsigned i;
11503
11504 switch (size)
11505 {
11506 case 0:
11507 for (i = 0; i < (all ? 16 : 8); i++)
11508 aarch64_set_mem_u8 (cpu, address + i,
11509 aarch64_get_vec_u8 (cpu, vd, i));
11510 return;
11511
11512 case 1:
11513 for (i = 0; i < (all ? 8 : 4); i++)
11514 aarch64_set_mem_u16 (cpu, address + i * 2,
11515 aarch64_get_vec_u16 (cpu, vd, i));
11516 return;
11517
11518 case 2:
11519 for (i = 0; i < (all ? 4 : 2); i++)
11520 aarch64_set_mem_u32 (cpu, address + i * 4,
11521 aarch64_get_vec_u32 (cpu, vd, i));
11522 return;
11523
11524 case 3:
11525 for (i = 0; i < (all ? 2 : 1); i++)
11526 aarch64_set_mem_u64 (cpu, address + i * 8,
11527 aarch64_get_vec_u64 (cpu, vd, i));
11528 return;
11529 }
11530 }
11531
11532 /* Store multiple 1-element structures into two registers. */
11533 static void
11534 ST1_2 (sim_cpu *cpu, uint64_t address)
11535 {
11536 /* FIXME: This algorithm is *exactly* the same as the ST2 version.
11537 So why have two different instructions ? There must be
11538 something wrong somewhere. */
11539 vec_store (cpu, address, 2);
11540 }
11541
11542 /* Store multiple 1-element structures into three registers. */
11543 static void
11544 ST1_3 (sim_cpu *cpu, uint64_t address)
11545 {
11546 /* FIXME: This algorithm is *exactly* the same as the ST3 version.
11547 So why have two different instructions ? There must be
11548 something wrong somewhere. */
11549 vec_store (cpu, address, 3);
11550 }
11551
11552 /* Store multiple 1-element structures into four registers. */
11553 static void
11554 ST1_4 (sim_cpu *cpu, uint64_t address)
11555 {
11556 /* FIXME: This algorithm is *exactly* the same as the ST4 version.
11557 So why have two different instructions ? There must be
11558 something wrong somewhere. */
11559 vec_store (cpu, address, 4);
11560 }
11561
11562 static void
11563 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11564 {
11565 /* instr[31] = 0
11566 instr[30] = element selector 0=>half, 1=>all elements
11567 instr[29,24] = 00 1101
11568 instr[23] = 0=>simple, 1=>post
11569 instr[22] = 1
11570 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11571 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11572 11111 (immediate post inc)
11573 instr[15,14] = 11
11574 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11575 instr[12] = 0
11576 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11577 10=> word(s), 11=> double(d)
11578 instr[9,5] = address
11579 instr[4,0] = Vd */
11580
11581 unsigned full = INSTR (30, 30);
11582 unsigned vd = INSTR (4, 0);
11583 unsigned size = INSTR (11, 10);
11584 int i;
11585
11586 NYI_assert (29, 24, 0x0D);
11587 NYI_assert (22, 22, 1);
11588 NYI_assert (15, 14, 3);
11589 NYI_assert (12, 12, 0);
11590
11591 switch ((INSTR (13, 13) << 1) | INSTR (21, 21))
11592 {
11593 case 0: /* LD1R. */
11594 switch (size)
11595 {
11596 case 0:
11597 {
11598 uint8_t val = aarch64_get_mem_u8 (cpu, address);
11599 for (i = 0; i < (full ? 16 : 8); i++)
11600 aarch64_set_vec_u8 (cpu, vd, i, val);
11601 break;
11602 }
11603
11604 case 1:
11605 {
11606 uint16_t val = aarch64_get_mem_u16 (cpu, address);
11607 for (i = 0; i < (full ? 8 : 4); i++)
11608 aarch64_set_vec_u16 (cpu, vd, i, val);
11609 break;
11610 }
11611
11612 case 2:
11613 {
11614 uint32_t val = aarch64_get_mem_u32 (cpu, address);
11615 for (i = 0; i < (full ? 4 : 2); i++)
11616 aarch64_set_vec_u32 (cpu, vd, i, val);
11617 break;
11618 }
11619
11620 case 3:
11621 {
11622 uint64_t val = aarch64_get_mem_u64 (cpu, address);
11623 for (i = 0; i < (full ? 2 : 1); i++)
11624 aarch64_set_vec_u64 (cpu, vd, i, val);
11625 break;
11626 }
11627
11628 default:
11629 HALT_UNALLOC;
11630 }
11631 break;
11632
11633 case 1: /* LD2R. */
11634 switch (size)
11635 {
11636 case 0:
11637 {
11638 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11639 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11640
11641 for (i = 0; i < (full ? 16 : 8); i++)
11642 {
11643 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11644 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11645 }
11646 break;
11647 }
11648
11649 case 1:
11650 {
11651 uint16_t val1 = aarch64_get_mem_u16 (cpu, address);
11652 uint16_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11653
11654 for (i = 0; i < (full ? 8 : 4); i++)
11655 {
11656 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11657 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11658 }
11659 break;
11660 }
11661
11662 case 2:
11663 {
11664 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11665 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11666
11667 for (i = 0; i < (full ? 4 : 2); i++)
11668 {
11669 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11670 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11671 }
11672 break;
11673 }
11674
11675 case 3:
11676 {
11677 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11678 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11679
11680 for (i = 0; i < (full ? 2 : 1); i++)
11681 {
11682 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11683 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11684 }
11685 break;
11686 }
11687
11688 default:
11689 HALT_UNALLOC;
11690 }
11691 break;
11692
11693 case 2: /* LD3R. */
11694 switch (size)
11695 {
11696 case 0:
11697 {
11698 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11699 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11700 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11701
11702 for (i = 0; i < (full ? 16 : 8); i++)
11703 {
11704 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11705 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11706 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11707 }
11708 }
11709 break;
11710
11711 case 1:
11712 {
11713 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11714 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11715 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11716
11717 for (i = 0; i < (full ? 8 : 4); i++)
11718 {
11719 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11720 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11721 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11722 }
11723 }
11724 break;
11725
11726 case 2:
11727 {
11728 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11729 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11730 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11731
11732 for (i = 0; i < (full ? 4 : 2); i++)
11733 {
11734 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11735 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11736 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11737 }
11738 }
11739 break;
11740
11741 case 3:
11742 {
11743 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11744 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11745 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11746
11747 for (i = 0; i < (full ? 2 : 1); i++)
11748 {
11749 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11750 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11751 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11752 }
11753 }
11754 break;
11755
11756 default:
11757 HALT_UNALLOC;
11758 }
11759 break;
11760
11761 case 3: /* LD4R. */
11762 switch (size)
11763 {
11764 case 0:
11765 {
11766 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11767 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11768 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11769 uint8_t val4 = aarch64_get_mem_u8 (cpu, address + 3);
11770
11771 for (i = 0; i < (full ? 16 : 8); i++)
11772 {
11773 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11774 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11775 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11776 aarch64_set_vec_u8 (cpu, vd + 3, 0, val4);
11777 }
11778 }
11779 break;
11780
11781 case 1:
11782 {
11783 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11784 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11785 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11786 uint32_t val4 = aarch64_get_mem_u16 (cpu, address + 6);
11787
11788 for (i = 0; i < (full ? 8 : 4); i++)
11789 {
11790 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11791 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11792 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11793 aarch64_set_vec_u16 (cpu, vd + 3, 0, val4);
11794 }
11795 }
11796 break;
11797
11798 case 2:
11799 {
11800 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11801 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11802 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11803 uint32_t val4 = aarch64_get_mem_u32 (cpu, address + 12);
11804
11805 for (i = 0; i < (full ? 4 : 2); i++)
11806 {
11807 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11808 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11809 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11810 aarch64_set_vec_u32 (cpu, vd + 3, 0, val4);
11811 }
11812 }
11813 break;
11814
11815 case 3:
11816 {
11817 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11818 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11819 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11820 uint64_t val4 = aarch64_get_mem_u64 (cpu, address + 24);
11821
11822 for (i = 0; i < (full ? 2 : 1); i++)
11823 {
11824 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11825 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11826 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11827 aarch64_set_vec_u64 (cpu, vd + 3, 0, val4);
11828 }
11829 }
11830 break;
11831
11832 default:
11833 HALT_UNALLOC;
11834 }
11835 break;
11836
11837 default:
11838 HALT_UNALLOC;
11839 }
11840 }
11841
11842 static void
11843 do_vec_load_store (sim_cpu *cpu)
11844 {
11845 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11846
11847 instr[31] = 0
11848 instr[30] = element selector 0=>half, 1=>all elements
11849 instr[29,25] = 00110
11850 instr[24] = ?
11851 instr[23] = 0=>simple, 1=>post
11852 instr[22] = 0=>store, 1=>load
11853 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
11854 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11855 11111 (immediate post inc)
11856 instr[15,12] = elements and destinations. eg for load:
11857 0000=>LD4 => load multiple 4-element to
11858 four consecutive registers
11859 0100=>LD3 => load multiple 3-element to
11860 three consecutive registers
11861 1000=>LD2 => load multiple 2-element to
11862 two consecutive registers
11863 0010=>LD1 => load multiple 1-element to
11864 four consecutive registers
11865 0110=>LD1 => load multiple 1-element to
11866 three consecutive registers
11867 1010=>LD1 => load multiple 1-element to
11868 two consecutive registers
11869 0111=>LD1 => load multiple 1-element to
11870 one register
11871 1100=>LDR1,LDR2
11872 1110=>LDR3,LDR4
11873 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11874 10=> word(s), 11=> double(d)
11875 instr[9,5] = Vn, can be SP
11876 instr[4,0] = Vd */
11877
11878 int post;
11879 int load;
11880 unsigned vn;
11881 uint64_t address;
11882 int type;
11883
11884 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
11885 HALT_NYI;
11886
11887 type = INSTR (15, 12);
11888 if (type != 0xE && type != 0xE && INSTR (21, 21) != 0)
11889 HALT_NYI;
11890
11891 post = INSTR (23, 23);
11892 load = INSTR (22, 22);
11893 vn = INSTR (9, 5);
11894 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11895
11896 if (post)
11897 {
11898 unsigned vm = INSTR (20, 16);
11899
11900 if (vm == R31)
11901 {
11902 unsigned sizeof_operation;
11903
11904 switch (type)
11905 {
11906 case 0: sizeof_operation = 32; break;
11907 case 4: sizeof_operation = 24; break;
11908 case 8: sizeof_operation = 16; break;
11909
11910 case 0xC:
11911 sizeof_operation = INSTR (21, 21) ? 2 : 1;
11912 sizeof_operation <<= INSTR (11, 10);
11913 break;
11914
11915 case 0xE:
11916 sizeof_operation = INSTR (21, 21) ? 8 : 4;
11917 sizeof_operation <<= INSTR (11, 10);
11918 break;
11919
11920 case 7:
11921 /* One register, immediate offset variant. */
11922 sizeof_operation = 8;
11923 break;
11924
11925 case 10:
11926 /* Two registers, immediate offset variant. */
11927 sizeof_operation = 16;
11928 break;
11929
11930 case 6:
11931 /* Three registers, immediate offset variant. */
11932 sizeof_operation = 24;
11933 break;
11934
11935 case 2:
11936 /* Four registers, immediate offset variant. */
11937 sizeof_operation = 32;
11938 break;
11939
11940 default:
11941 HALT_UNALLOC;
11942 }
11943
11944 if (INSTR (30, 30))
11945 sizeof_operation *= 2;
11946
11947 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
11948 }
11949 else
11950 aarch64_set_reg_u64 (cpu, vn, SP_OK,
11951 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
11952 }
11953 else
11954 {
11955 NYI_assert (20, 16, 0);
11956 }
11957
11958 if (load)
11959 {
11960 switch (type)
11961 {
11962 case 0: LD4 (cpu, address); return;
11963 case 4: LD3 (cpu, address); return;
11964 case 8: LD2 (cpu, address); return;
11965 case 2: LD1_4 (cpu, address); return;
11966 case 6: LD1_3 (cpu, address); return;
11967 case 10: LD1_2 (cpu, address); return;
11968 case 7: LD1_1 (cpu, address); return;
11969
11970 case 0xE:
11971 case 0xC: do_vec_LDnR (cpu, address); return;
11972
11973 default:
11974 HALT_NYI;
11975 }
11976 }
11977
11978 /* Stores. */
11979 switch (type)
11980 {
11981 case 0: ST4 (cpu, address); return;
11982 case 4: ST3 (cpu, address); return;
11983 case 8: ST2 (cpu, address); return;
11984 case 2: ST1_4 (cpu, address); return;
11985 case 6: ST1_3 (cpu, address); return;
11986 case 10: ST1_2 (cpu, address); return;
11987 case 7: ST1_1 (cpu, address); return;
11988 default:
11989 HALT_NYI;
11990 }
11991 }
11992
11993 static void
11994 dexLdSt (sim_cpu *cpu)
11995 {
11996 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
11997 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
11998 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
11999 bits [29,28:26] of a LS are the secondary dispatch vector. */
12000 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
12001
12002 switch (group2)
12003 {
12004 case LS_EXCL_000:
12005 dexLoadExclusive (cpu); return;
12006
12007 case LS_LIT_010:
12008 case LS_LIT_011:
12009 dexLoadLiteral (cpu); return;
12010
12011 case LS_OTHER_110:
12012 case LS_OTHER_111:
12013 dexLoadOther (cpu); return;
12014
12015 case LS_ADVSIMD_001:
12016 do_vec_load_store (cpu); return;
12017
12018 case LS_PAIR_100:
12019 dex_load_store_pair_gr (cpu); return;
12020
12021 case LS_PAIR_101:
12022 dex_load_store_pair_fp (cpu); return;
12023
12024 default:
12025 /* Should never reach here. */
12026 HALT_NYI;
12027 }
12028 }
12029
12030 /* Specific decode and execute for group Data Processing Register. */
12031
12032 static void
12033 dexLogicalShiftedRegister (sim_cpu *cpu)
12034 {
12035 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12036 instr[30,29] = op
12037 instr[28:24] = 01010
12038 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12039 instr[21] = N
12040 instr[20,16] = Rm
12041 instr[15,10] = count : must be 0xxxxx for 32 bit
12042 instr[9,5] = Rn
12043 instr[4,0] = Rd */
12044
12045 uint32_t size = INSTR (31, 31);
12046 Shift shiftType = INSTR (23, 22);
12047 uint32_t count = INSTR (15, 10);
12048
12049 /* 32 bit operations must have count[5] = 0.
12050 or else we have an UNALLOC. */
12051 if (size == 0 && uimm (count, 5, 5))
12052 HALT_UNALLOC;
12053
12054 /* Dispatch on size:op:N. */
12055 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12056 {
12057 case 0: and32_shift (cpu, shiftType, count); return;
12058 case 1: bic32_shift (cpu, shiftType, count); return;
12059 case 2: orr32_shift (cpu, shiftType, count); return;
12060 case 3: orn32_shift (cpu, shiftType, count); return;
12061 case 4: eor32_shift (cpu, shiftType, count); return;
12062 case 5: eon32_shift (cpu, shiftType, count); return;
12063 case 6: ands32_shift (cpu, shiftType, count); return;
12064 case 7: bics32_shift (cpu, shiftType, count); return;
12065 case 8: and64_shift (cpu, shiftType, count); return;
12066 case 9: bic64_shift (cpu, shiftType, count); return;
12067 case 10:orr64_shift (cpu, shiftType, count); return;
12068 case 11:orn64_shift (cpu, shiftType, count); return;
12069 case 12:eor64_shift (cpu, shiftType, count); return;
12070 case 13:eon64_shift (cpu, shiftType, count); return;
12071 case 14:ands64_shift (cpu, shiftType, count); return;
12072 case 15:bics64_shift (cpu, shiftType, count); return;
12073 }
12074 }
12075
12076 /* 32 bit conditional select. */
12077 static void
12078 csel32 (sim_cpu *cpu, CondCode cc)
12079 {
12080 unsigned rm = INSTR (20, 16);
12081 unsigned rn = INSTR (9, 5);
12082 unsigned rd = INSTR (4, 0);
12083
12084 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12085 testConditionCode (cpu, cc)
12086 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12087 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12088 }
12089
12090 /* 64 bit conditional select. */
12091 static void
12092 csel64 (sim_cpu *cpu, CondCode cc)
12093 {
12094 unsigned rm = INSTR (20, 16);
12095 unsigned rn = INSTR (9, 5);
12096 unsigned rd = INSTR (4, 0);
12097
12098 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12099 testConditionCode (cpu, cc)
12100 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12101 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12102 }
12103
12104 /* 32 bit conditional increment. */
12105 static void
12106 csinc32 (sim_cpu *cpu, CondCode cc)
12107 {
12108 unsigned rm = INSTR (20, 16);
12109 unsigned rn = INSTR (9, 5);
12110 unsigned rd = INSTR (4, 0);
12111
12112 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12113 testConditionCode (cpu, cc)
12114 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12115 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12116 }
12117
12118 /* 64 bit conditional increment. */
12119 static void
12120 csinc64 (sim_cpu *cpu, CondCode cc)
12121 {
12122 unsigned rm = INSTR (20, 16);
12123 unsigned rn = INSTR (9, 5);
12124 unsigned rd = INSTR (4, 0);
12125
12126 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12127 testConditionCode (cpu, cc)
12128 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12129 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12130 }
12131
12132 /* 32 bit conditional invert. */
12133 static void
12134 csinv32 (sim_cpu *cpu, CondCode cc)
12135 {
12136 unsigned rm = INSTR (20, 16);
12137 unsigned rn = INSTR (9, 5);
12138 unsigned rd = INSTR (4, 0);
12139
12140 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12141 testConditionCode (cpu, cc)
12142 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12143 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12144 }
12145
12146 /* 64 bit conditional invert. */
12147 static void
12148 csinv64 (sim_cpu *cpu, CondCode cc)
12149 {
12150 unsigned rm = INSTR (20, 16);
12151 unsigned rn = INSTR (9, 5);
12152 unsigned rd = INSTR (4, 0);
12153
12154 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12155 testConditionCode (cpu, cc)
12156 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12157 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12158 }
12159
12160 /* 32 bit conditional negate. */
12161 static void
12162 csneg32 (sim_cpu *cpu, CondCode cc)
12163 {
12164 unsigned rm = INSTR (20, 16);
12165 unsigned rn = INSTR (9, 5);
12166 unsigned rd = INSTR (4, 0);
12167
12168 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12169 testConditionCode (cpu, cc)
12170 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12171 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12172 }
12173
12174 /* 64 bit conditional negate. */
12175 static void
12176 csneg64 (sim_cpu *cpu, CondCode cc)
12177 {
12178 unsigned rm = INSTR (20, 16);
12179 unsigned rn = INSTR (9, 5);
12180 unsigned rd = INSTR (4, 0);
12181
12182 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12183 testConditionCode (cpu, cc)
12184 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12185 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12186 }
12187
12188 static void
12189 dexCondSelect (sim_cpu *cpu)
12190 {
12191 /* instr[28,21] = 11011011
12192 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12193 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12194 100 ==> CSINV, 101 ==> CSNEG,
12195 _1_ ==> UNALLOC
12196 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12197 instr[15,12] = cond
12198 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12199
12200 CondCode cc = INSTR (15, 12);
12201 uint32_t S = INSTR (29, 29);
12202 uint32_t op2 = INSTR (11, 10);
12203
12204 if (S == 1)
12205 HALT_UNALLOC;
12206
12207 if (op2 & 0x2)
12208 HALT_UNALLOC;
12209
12210 switch ((INSTR (31, 30) << 1) | op2)
12211 {
12212 case 0: csel32 (cpu, cc); return;
12213 case 1: csinc32 (cpu, cc); return;
12214 case 2: csinv32 (cpu, cc); return;
12215 case 3: csneg32 (cpu, cc); return;
12216 case 4: csel64 (cpu, cc); return;
12217 case 5: csinc64 (cpu, cc); return;
12218 case 6: csinv64 (cpu, cc); return;
12219 case 7: csneg64 (cpu, cc); return;
12220 }
12221 }
12222
12223 /* Some helpers for counting leading 1 or 0 bits. */
12224
12225 /* Counts the number of leading bits which are the same
12226 in a 32 bit value in the range 1 to 32. */
12227 static uint32_t
12228 leading32 (uint32_t value)
12229 {
12230 int32_t mask= 0xffff0000;
12231 uint32_t count= 16; /* Counts number of bits set in mask. */
12232 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12233 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12234
12235 while (lo + 1 < hi)
12236 {
12237 int32_t test = (value & mask);
12238
12239 if (test == 0 || test == mask)
12240 {
12241 lo = count;
12242 count = (lo + hi) / 2;
12243 mask >>= (count - lo);
12244 }
12245 else
12246 {
12247 hi = count;
12248 count = (lo + hi) / 2;
12249 mask <<= hi - count;
12250 }
12251 }
12252
12253 if (lo != hi)
12254 {
12255 int32_t test;
12256
12257 mask >>= 1;
12258 test = (value & mask);
12259
12260 if (test == 0 || test == mask)
12261 count = hi;
12262 else
12263 count = lo;
12264 }
12265
12266 return count;
12267 }
12268
12269 /* Counts the number of leading bits which are the same
12270 in a 64 bit value in the range 1 to 64. */
12271 static uint64_t
12272 leading64 (uint64_t value)
12273 {
12274 int64_t mask= 0xffffffff00000000LL;
12275 uint64_t count = 32; /* Counts number of bits set in mask. */
12276 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12277 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12278
12279 while (lo + 1 < hi)
12280 {
12281 int64_t test = (value & mask);
12282
12283 if (test == 0 || test == mask)
12284 {
12285 lo = count;
12286 count = (lo + hi) / 2;
12287 mask >>= (count - lo);
12288 }
12289 else
12290 {
12291 hi = count;
12292 count = (lo + hi) / 2;
12293 mask <<= hi - count;
12294 }
12295 }
12296
12297 if (lo != hi)
12298 {
12299 int64_t test;
12300
12301 mask >>= 1;
12302 test = (value & mask);
12303
12304 if (test == 0 || test == mask)
12305 count = hi;
12306 else
12307 count = lo;
12308 }
12309
12310 return count;
12311 }
12312
12313 /* Bit operations. */
12314 /* N.B register args may not be SP. */
12315
12316 /* 32 bit count leading sign bits. */
12317 static void
12318 cls32 (sim_cpu *cpu)
12319 {
12320 unsigned rn = INSTR (9, 5);
12321 unsigned rd = INSTR (4, 0);
12322
12323 /* N.B. the result needs to exclude the leading bit. */
12324 aarch64_set_reg_u64
12325 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12326 }
12327
12328 /* 64 bit count leading sign bits. */
12329 static void
12330 cls64 (sim_cpu *cpu)
12331 {
12332 unsigned rn = INSTR (9, 5);
12333 unsigned rd = INSTR (4, 0);
12334
12335 /* N.B. the result needs to exclude the leading bit. */
12336 aarch64_set_reg_u64
12337 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12338 }
12339
12340 /* 32 bit count leading zero bits. */
12341 static void
12342 clz32 (sim_cpu *cpu)
12343 {
12344 unsigned rn = INSTR (9, 5);
12345 unsigned rd = INSTR (4, 0);
12346 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12347
12348 /* if the sign (top) bit is set then the count is 0. */
12349 if (pick32 (value, 31, 31))
12350 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12351 else
12352 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12353 }
12354
12355 /* 64 bit count leading zero bits. */
12356 static void
12357 clz64 (sim_cpu *cpu)
12358 {
12359 unsigned rn = INSTR (9, 5);
12360 unsigned rd = INSTR (4, 0);
12361 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12362
12363 /* if the sign (top) bit is set then the count is 0. */
12364 if (pick64 (value, 63, 63))
12365 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12366 else
12367 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12368 }
12369
12370 /* 32 bit reverse bits. */
12371 static void
12372 rbit32 (sim_cpu *cpu)
12373 {
12374 unsigned rn = INSTR (9, 5);
12375 unsigned rd = INSTR (4, 0);
12376 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12377 uint32_t result = 0;
12378 int i;
12379
12380 for (i = 0; i < 32; i++)
12381 {
12382 result <<= 1;
12383 result |= (value & 1);
12384 value >>= 1;
12385 }
12386 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12387 }
12388
12389 /* 64 bit reverse bits. */
12390 static void
12391 rbit64 (sim_cpu *cpu)
12392 {
12393 unsigned rn = INSTR (9, 5);
12394 unsigned rd = INSTR (4, 0);
12395 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12396 uint64_t result = 0;
12397 int i;
12398
12399 for (i = 0; i < 64; i++)
12400 {
12401 result <<= 1;
12402 result |= (value & 1UL);
12403 value >>= 1;
12404 }
12405 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12406 }
12407
12408 /* 32 bit reverse bytes. */
12409 static void
12410 rev32 (sim_cpu *cpu)
12411 {
12412 unsigned rn = INSTR (9, 5);
12413 unsigned rd = INSTR (4, 0);
12414 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12415 uint32_t result = 0;
12416 int i;
12417
12418 for (i = 0; i < 4; i++)
12419 {
12420 result <<= 8;
12421 result |= (value & 0xff);
12422 value >>= 8;
12423 }
12424 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12425 }
12426
12427 /* 64 bit reverse bytes. */
12428 static void
12429 rev64 (sim_cpu *cpu)
12430 {
12431 unsigned rn = INSTR (9, 5);
12432 unsigned rd = INSTR (4, 0);
12433 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12434 uint64_t result = 0;
12435 int i;
12436
12437 for (i = 0; i < 8; i++)
12438 {
12439 result <<= 8;
12440 result |= (value & 0xffULL);
12441 value >>= 8;
12442 }
12443 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12444 }
12445
12446 /* 32 bit reverse shorts. */
12447 /* N.B.this reverses the order of the bytes in each half word. */
12448 static void
12449 revh32 (sim_cpu *cpu)
12450 {
12451 unsigned rn = INSTR (9, 5);
12452 unsigned rd = INSTR (4, 0);
12453 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12454 uint32_t result = 0;
12455 int i;
12456
12457 for (i = 0; i < 2; i++)
12458 {
12459 result <<= 8;
12460 result |= (value & 0x00ff00ff);
12461 value >>= 8;
12462 }
12463 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12464 }
12465
12466 /* 64 bit reverse shorts. */
12467 /* N.B.this reverses the order of the bytes in each half word. */
12468 static void
12469 revh64 (sim_cpu *cpu)
12470 {
12471 unsigned rn = INSTR (9, 5);
12472 unsigned rd = INSTR (4, 0);
12473 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12474 uint64_t result = 0;
12475 int i;
12476
12477 for (i = 0; i < 2; i++)
12478 {
12479 result <<= 8;
12480 result |= (value & 0x00ff00ff00ff00ffULL);
12481 value >>= 8;
12482 }
12483 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12484 }
12485
12486 static void
12487 dexDataProc1Source (sim_cpu *cpu)
12488 {
12489 /* instr[30] = 1
12490 instr[28,21] = 111010110
12491 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12492 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12493 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12494 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12495 000010 ==> REV, 000011 ==> UNALLOC
12496 000100 ==> CLZ, 000101 ==> CLS
12497 ow ==> UNALLOC
12498 instr[9,5] = rn : may not be SP
12499 instr[4,0] = rd : may not be SP. */
12500
12501 uint32_t S = INSTR (29, 29);
12502 uint32_t opcode2 = INSTR (20, 16);
12503 uint32_t opcode = INSTR (15, 10);
12504 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12505
12506 if (S == 1)
12507 HALT_UNALLOC;
12508
12509 if (opcode2 != 0)
12510 HALT_UNALLOC;
12511
12512 if (opcode & 0x38)
12513 HALT_UNALLOC;
12514
12515 switch (dispatch)
12516 {
12517 case 0: rbit32 (cpu); return;
12518 case 1: revh32 (cpu); return;
12519 case 2: rev32 (cpu); return;
12520 case 4: clz32 (cpu); return;
12521 case 5: cls32 (cpu); return;
12522 case 8: rbit64 (cpu); return;
12523 case 9: revh64 (cpu); return;
12524 case 10:rev32 (cpu); return;
12525 case 11:rev64 (cpu); return;
12526 case 12:clz64 (cpu); return;
12527 case 13:cls64 (cpu); return;
12528 default: HALT_UNALLOC;
12529 }
12530 }
12531
12532 /* Variable shift.
12533 Shifts by count supplied in register.
12534 N.B register args may not be SP.
12535 These all use the shifted auxiliary function for
12536 simplicity and clarity. Writing the actual shift
12537 inline would avoid a branch and so be faster but
12538 would also necessitate getting signs right. */
12539
12540 /* 32 bit arithmetic shift right. */
12541 static void
12542 asrv32 (sim_cpu *cpu)
12543 {
12544 unsigned rm = INSTR (20, 16);
12545 unsigned rn = INSTR (9, 5);
12546 unsigned rd = INSTR (4, 0);
12547
12548 aarch64_set_reg_u64
12549 (cpu, rd, NO_SP,
12550 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12551 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12552 }
12553
12554 /* 64 bit arithmetic shift right. */
12555 static void
12556 asrv64 (sim_cpu *cpu)
12557 {
12558 unsigned rm = INSTR (20, 16);
12559 unsigned rn = INSTR (9, 5);
12560 unsigned rd = INSTR (4, 0);
12561
12562 aarch64_set_reg_u64
12563 (cpu, rd, NO_SP,
12564 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12565 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12566 }
12567
12568 /* 32 bit logical shift left. */
12569 static void
12570 lslv32 (sim_cpu *cpu)
12571 {
12572 unsigned rm = INSTR (20, 16);
12573 unsigned rn = INSTR (9, 5);
12574 unsigned rd = INSTR (4, 0);
12575
12576 aarch64_set_reg_u64
12577 (cpu, rd, NO_SP,
12578 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12579 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12580 }
12581
12582 /* 64 bit arithmetic shift left. */
12583 static void
12584 lslv64 (sim_cpu *cpu)
12585 {
12586 unsigned rm = INSTR (20, 16);
12587 unsigned rn = INSTR (9, 5);
12588 unsigned rd = INSTR (4, 0);
12589
12590 aarch64_set_reg_u64
12591 (cpu, rd, NO_SP,
12592 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12593 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12594 }
12595
12596 /* 32 bit logical shift right. */
12597 static void
12598 lsrv32 (sim_cpu *cpu)
12599 {
12600 unsigned rm = INSTR (20, 16);
12601 unsigned rn = INSTR (9, 5);
12602 unsigned rd = INSTR (4, 0);
12603
12604 aarch64_set_reg_u64
12605 (cpu, rd, NO_SP,
12606 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12607 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12608 }
12609
12610 /* 64 bit logical shift right. */
12611 static void
12612 lsrv64 (sim_cpu *cpu)
12613 {
12614 unsigned rm = INSTR (20, 16);
12615 unsigned rn = INSTR (9, 5);
12616 unsigned rd = INSTR (4, 0);
12617
12618 aarch64_set_reg_u64
12619 (cpu, rd, NO_SP,
12620 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12621 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12622 }
12623
12624 /* 32 bit rotate right. */
12625 static void
12626 rorv32 (sim_cpu *cpu)
12627 {
12628 unsigned rm = INSTR (20, 16);
12629 unsigned rn = INSTR (9, 5);
12630 unsigned rd = INSTR (4, 0);
12631
12632 aarch64_set_reg_u64
12633 (cpu, rd, NO_SP,
12634 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12635 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12636 }
12637
12638 /* 64 bit rotate right. */
12639 static void
12640 rorv64 (sim_cpu *cpu)
12641 {
12642 unsigned rm = INSTR (20, 16);
12643 unsigned rn = INSTR (9, 5);
12644 unsigned rd = INSTR (4, 0);
12645
12646 aarch64_set_reg_u64
12647 (cpu, rd, NO_SP,
12648 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12649 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12650 }
12651
12652
12653 /* divide. */
12654
12655 /* 32 bit signed divide. */
12656 static void
12657 cpuiv32 (sim_cpu *cpu)
12658 {
12659 unsigned rm = INSTR (20, 16);
12660 unsigned rn = INSTR (9, 5);
12661 unsigned rd = INSTR (4, 0);
12662 /* N.B. the pseudo-code does the divide using 64 bit data. */
12663 /* TODO : check that this rounds towards zero as required. */
12664 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12665 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12666
12667 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12668 divisor ? ((int32_t) (dividend / divisor)) : 0);
12669 }
12670
12671 /* 64 bit signed divide. */
12672 static void
12673 cpuiv64 (sim_cpu *cpu)
12674 {
12675 unsigned rm = INSTR (20, 16);
12676 unsigned rn = INSTR (9, 5);
12677 unsigned rd = INSTR (4, 0);
12678
12679 /* TODO : check that this rounds towards zero as required. */
12680 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12681
12682 aarch64_set_reg_s64
12683 (cpu, rd, NO_SP,
12684 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12685 }
12686
12687 /* 32 bit unsigned divide. */
12688 static void
12689 udiv32 (sim_cpu *cpu)
12690 {
12691 unsigned rm = INSTR (20, 16);
12692 unsigned rn = INSTR (9, 5);
12693 unsigned rd = INSTR (4, 0);
12694
12695 /* N.B. the pseudo-code does the divide using 64 bit data. */
12696 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12697 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12698
12699 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12700 divisor ? (uint32_t) (dividend / divisor) : 0);
12701 }
12702
12703 /* 64 bit unsigned divide. */
12704 static void
12705 udiv64 (sim_cpu *cpu)
12706 {
12707 unsigned rm = INSTR (20, 16);
12708 unsigned rn = INSTR (9, 5);
12709 unsigned rd = INSTR (4, 0);
12710
12711 /* TODO : check that this rounds towards zero as required. */
12712 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12713
12714 aarch64_set_reg_u64
12715 (cpu, rd, NO_SP,
12716 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12717 }
12718
12719 static void
12720 dexDataProc2Source (sim_cpu *cpu)
12721 {
12722 /* assert instr[30] == 0
12723 instr[28,21] == 11010110
12724 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12725 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12726 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12727 001000 ==> LSLV, 001001 ==> LSRV
12728 001010 ==> ASRV, 001011 ==> RORV
12729 ow ==> UNALLOC. */
12730
12731 uint32_t dispatch;
12732 uint32_t S = INSTR (29, 29);
12733 uint32_t opcode = INSTR (15, 10);
12734
12735 if (S == 1)
12736 HALT_UNALLOC;
12737
12738 if (opcode & 0x34)
12739 HALT_UNALLOC;
12740
12741 dispatch = ( (INSTR (31, 31) << 3)
12742 | (uimm (opcode, 3, 3) << 2)
12743 | uimm (opcode, 1, 0));
12744 switch (dispatch)
12745 {
12746 case 2: udiv32 (cpu); return;
12747 case 3: cpuiv32 (cpu); return;
12748 case 4: lslv32 (cpu); return;
12749 case 5: lsrv32 (cpu); return;
12750 case 6: asrv32 (cpu); return;
12751 case 7: rorv32 (cpu); return;
12752 case 10: udiv64 (cpu); return;
12753 case 11: cpuiv64 (cpu); return;
12754 case 12: lslv64 (cpu); return;
12755 case 13: lsrv64 (cpu); return;
12756 case 14: asrv64 (cpu); return;
12757 case 15: rorv64 (cpu); return;
12758 default: HALT_UNALLOC;
12759 }
12760 }
12761
12762
12763 /* Multiply. */
12764
12765 /* 32 bit multiply and add. */
12766 static void
12767 madd32 (sim_cpu *cpu)
12768 {
12769 unsigned rm = INSTR (20, 16);
12770 unsigned ra = INSTR (14, 10);
12771 unsigned rn = INSTR (9, 5);
12772 unsigned rd = INSTR (4, 0);
12773
12774 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12775 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12776 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12777 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12778 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12779 }
12780
12781 /* 64 bit multiply and add. */
12782 static void
12783 madd64 (sim_cpu *cpu)
12784 {
12785 unsigned rm = INSTR (20, 16);
12786 unsigned ra = INSTR (14, 10);
12787 unsigned rn = INSTR (9, 5);
12788 unsigned rd = INSTR (4, 0);
12789
12790 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12791 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12792 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12793 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12794 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12795 }
12796
12797 /* 32 bit multiply and sub. */
12798 static void
12799 msub32 (sim_cpu *cpu)
12800 {
12801 unsigned rm = INSTR (20, 16);
12802 unsigned ra = INSTR (14, 10);
12803 unsigned rn = INSTR (9, 5);
12804 unsigned rd = INSTR (4, 0);
12805
12806 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12807 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12808 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12809 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12810 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12811 }
12812
12813 /* 64 bit multiply and sub. */
12814 static void
12815 msub64 (sim_cpu *cpu)
12816 {
12817 unsigned rm = INSTR (20, 16);
12818 unsigned ra = INSTR (14, 10);
12819 unsigned rn = INSTR (9, 5);
12820 unsigned rd = INSTR (4, 0);
12821
12822 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12823 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12824 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12825 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12826 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12827 }
12828
12829 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
12830 static void
12831 smaddl (sim_cpu *cpu)
12832 {
12833 unsigned rm = INSTR (20, 16);
12834 unsigned ra = INSTR (14, 10);
12835 unsigned rn = INSTR (9, 5);
12836 unsigned rd = INSTR (4, 0);
12837
12838 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12839 obtain a 64 bit product. */
12840 aarch64_set_reg_s64
12841 (cpu, rd, NO_SP,
12842 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12843 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12844 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12845 }
12846
12847 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12848 static void
12849 smsubl (sim_cpu *cpu)
12850 {
12851 unsigned rm = INSTR (20, 16);
12852 unsigned ra = INSTR (14, 10);
12853 unsigned rn = INSTR (9, 5);
12854 unsigned rd = INSTR (4, 0);
12855
12856 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12857 obtain a 64 bit product. */
12858 aarch64_set_reg_s64
12859 (cpu, rd, NO_SP,
12860 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12861 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12862 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12863 }
12864
12865 /* Integer Multiply/Divide. */
12866
12867 /* First some macros and a helper function. */
12868 /* Macros to test or access elements of 64 bit words. */
12869
12870 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
12871 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12872 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12873 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12874 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12875 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12876
12877 /* Offset of sign bit in 64 bit signed integger. */
12878 #define SIGN_SHIFT_U64 63
12879 /* The sign bit itself -- also identifies the minimum negative int value. */
12880 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12881 /* Return true if a 64 bit signed int presented as an unsigned int is the
12882 most negative value. */
12883 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12884 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12885 int has its sign bit set to false. */
12886 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12887 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12888 an unsigned int has its sign bit set or not. */
12889 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12890 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
12891 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12892
12893 /* Multiply two 64 bit ints and return.
12894 the hi 64 bits of the 128 bit product. */
12895
12896 static uint64_t
12897 mul64hi (uint64_t value1, uint64_t value2)
12898 {
12899 uint64_t resultmid1;
12900 uint64_t result;
12901 uint64_t value1_lo = lowWordToU64 (value1);
12902 uint64_t value1_hi = highWordToU64 (value1) ;
12903 uint64_t value2_lo = lowWordToU64 (value2);
12904 uint64_t value2_hi = highWordToU64 (value2);
12905
12906 /* Cross-multiply and collect results. */
12907 uint64_t xproductlo = value1_lo * value2_lo;
12908 uint64_t xproductmid1 = value1_lo * value2_hi;
12909 uint64_t xproductmid2 = value1_hi * value2_lo;
12910 uint64_t xproducthi = value1_hi * value2_hi;
12911 uint64_t carry = 0;
12912 /* Start accumulating 64 bit results. */
12913 /* Drop bottom half of lowest cross-product. */
12914 uint64_t resultmid = xproductlo >> 32;
12915 /* Add in middle products. */
12916 resultmid = resultmid + xproductmid1;
12917
12918 /* Check for overflow. */
12919 if (resultmid < xproductmid1)
12920 /* Carry over 1 into top cross-product. */
12921 carry++;
12922
12923 resultmid1 = resultmid + xproductmid2;
12924
12925 /* Check for overflow. */
12926 if (resultmid1 < xproductmid2)
12927 /* Carry over 1 into top cross-product. */
12928 carry++;
12929
12930 /* Drop lowest 32 bits of middle cross-product. */
12931 result = resultmid1 >> 32;
12932
12933 /* Add top cross-product plus and any carry. */
12934 result += xproducthi + carry;
12935
12936 return result;
12937 }
12938
12939 /* Signed multiply high, source, source2 :
12940 64 bit, dest <-- high 64-bit of result. */
12941 static void
12942 smulh (sim_cpu *cpu)
12943 {
12944 uint64_t uresult;
12945 int64_t result;
12946 unsigned rm = INSTR (20, 16);
12947 unsigned rn = INSTR (9, 5);
12948 unsigned rd = INSTR (4, 0);
12949 GReg ra = INSTR (14, 10);
12950 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12951 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12952 uint64_t uvalue1;
12953 uint64_t uvalue2;
12954 int64_t signum = 1;
12955
12956 if (ra != R31)
12957 HALT_UNALLOC;
12958
12959 /* Convert to unsigned and use the unsigned mul64hi routine
12960 the fix the sign up afterwards. */
12961 if (value1 < 0)
12962 {
12963 signum *= -1L;
12964 uvalue1 = -value1;
12965 }
12966 else
12967 {
12968 uvalue1 = value1;
12969 }
12970
12971 if (value2 < 0)
12972 {
12973 signum *= -1L;
12974 uvalue2 = -value2;
12975 }
12976 else
12977 {
12978 uvalue2 = value2;
12979 }
12980
12981 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12982 uresult = mul64hi (uvalue1, uvalue2);
12983 result = uresult;
12984 result *= signum;
12985
12986 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
12987 }
12988
12989 /* Unsigned multiply add long -- source, source2 :
12990 32 bit, source3 : 64 bit. */
12991 static void
12992 umaddl (sim_cpu *cpu)
12993 {
12994 unsigned rm = INSTR (20, 16);
12995 unsigned ra = INSTR (14, 10);
12996 unsigned rn = INSTR (9, 5);
12997 unsigned rd = INSTR (4, 0);
12998
12999 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13000 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13001 obtain a 64 bit product. */
13002 aarch64_set_reg_u64
13003 (cpu, rd, NO_SP,
13004 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13005 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13006 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13007 }
13008
13009 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13010 static void
13011 umsubl (sim_cpu *cpu)
13012 {
13013 unsigned rm = INSTR (20, 16);
13014 unsigned ra = INSTR (14, 10);
13015 unsigned rn = INSTR (9, 5);
13016 unsigned rd = INSTR (4, 0);
13017
13018 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13019 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13020 obtain a 64 bit product. */
13021 aarch64_set_reg_u64
13022 (cpu, rd, NO_SP,
13023 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13024 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13025 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13026 }
13027
13028 /* Unsigned multiply high, source, source2 :
13029 64 bit, dest <-- high 64-bit of result. */
13030 static void
13031 umulh (sim_cpu *cpu)
13032 {
13033 unsigned rm = INSTR (20, 16);
13034 unsigned rn = INSTR (9, 5);
13035 unsigned rd = INSTR (4, 0);
13036 GReg ra = INSTR (14, 10);
13037
13038 if (ra != R31)
13039 HALT_UNALLOC;
13040
13041 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13042 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13043 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13044 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13045 }
13046
13047 static void
13048 dexDataProc3Source (sim_cpu *cpu)
13049 {
13050 /* assert instr[28,24] == 11011. */
13051 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13052 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13053 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13054 instr[15] = o0 : 0/1 ==> ok
13055 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
13056 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13057 0100 ==> SMULH, (64 bit only)
13058 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13059 1100 ==> UMULH (64 bit only)
13060 ow ==> UNALLOC. */
13061
13062 uint32_t dispatch;
13063 uint32_t size = INSTR (31, 31);
13064 uint32_t op54 = INSTR (30, 29);
13065 uint32_t op31 = INSTR (23, 21);
13066 uint32_t o0 = INSTR (15, 15);
13067
13068 if (op54 != 0)
13069 HALT_UNALLOC;
13070
13071 if (size == 0)
13072 {
13073 if (op31 != 0)
13074 HALT_UNALLOC;
13075
13076 if (o0 == 0)
13077 madd32 (cpu);
13078 else
13079 msub32 (cpu);
13080 return;
13081 }
13082
13083 dispatch = (op31 << 1) | o0;
13084
13085 switch (dispatch)
13086 {
13087 case 0: madd64 (cpu); return;
13088 case 1: msub64 (cpu); return;
13089 case 2: smaddl (cpu); return;
13090 case 3: smsubl (cpu); return;
13091 case 4: smulh (cpu); return;
13092 case 10: umaddl (cpu); return;
13093 case 11: umsubl (cpu); return;
13094 case 12: umulh (cpu); return;
13095 default: HALT_UNALLOC;
13096 }
13097 }
13098
13099 static void
13100 dexDPReg (sim_cpu *cpu)
13101 {
13102 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13103 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13104 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
13105 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13106
13107 switch (group2)
13108 {
13109 case DPREG_LOG_000:
13110 case DPREG_LOG_001:
13111 dexLogicalShiftedRegister (cpu); return;
13112
13113 case DPREG_ADDSHF_010:
13114 dexAddSubtractShiftedRegister (cpu); return;
13115
13116 case DPREG_ADDEXT_011:
13117 dexAddSubtractExtendedRegister (cpu); return;
13118
13119 case DPREG_ADDCOND_100:
13120 {
13121 /* This set bundles a variety of different operations. */
13122 /* Check for. */
13123 /* 1) add/sub w carry. */
13124 uint32_t mask1 = 0x1FE00000U;
13125 uint32_t val1 = 0x1A000000U;
13126 /* 2) cond compare register/immediate. */
13127 uint32_t mask2 = 0x1FE00000U;
13128 uint32_t val2 = 0x1A400000U;
13129 /* 3) cond select. */
13130 uint32_t mask3 = 0x1FE00000U;
13131 uint32_t val3 = 0x1A800000U;
13132 /* 4) data proc 1/2 source. */
13133 uint32_t mask4 = 0x1FE00000U;
13134 uint32_t val4 = 0x1AC00000U;
13135
13136 if ((aarch64_get_instr (cpu) & mask1) == val1)
13137 dexAddSubtractWithCarry (cpu);
13138
13139 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13140 CondCompare (cpu);
13141
13142 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13143 dexCondSelect (cpu);
13144
13145 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13146 {
13147 /* Bit 30 is clear for data proc 2 source
13148 and set for data proc 1 source. */
13149 if (aarch64_get_instr (cpu) & (1U << 30))
13150 dexDataProc1Source (cpu);
13151 else
13152 dexDataProc2Source (cpu);
13153 }
13154
13155 else
13156 /* Should not reach here. */
13157 HALT_NYI;
13158
13159 return;
13160 }
13161
13162 case DPREG_3SRC_110:
13163 dexDataProc3Source (cpu); return;
13164
13165 case DPREG_UNALLOC_101:
13166 HALT_UNALLOC;
13167
13168 case DPREG_3SRC_111:
13169 dexDataProc3Source (cpu); return;
13170
13171 default:
13172 /* Should never reach here. */
13173 HALT_NYI;
13174 }
13175 }
13176
13177 /* Unconditional Branch immediate.
13178 Offset is a PC-relative byte offset in the range +/- 128MiB.
13179 The offset is assumed to be raw from the decode i.e. the
13180 simulator is expected to scale them from word offsets to byte. */
13181
13182 /* Unconditional branch. */
13183 static void
13184 buc (sim_cpu *cpu, int32_t offset)
13185 {
13186 aarch64_set_next_PC_by_offset (cpu, offset);
13187 }
13188
13189 static unsigned stack_depth = 0;
13190
13191 /* Unconditional branch and link -- writes return PC to LR. */
13192 static void
13193 bl (sim_cpu *cpu, int32_t offset)
13194 {
13195 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13196 aarch64_save_LR (cpu);
13197 aarch64_set_next_PC_by_offset (cpu, offset);
13198
13199 if (TRACE_BRANCH_P (cpu))
13200 {
13201 ++ stack_depth;
13202 TRACE_BRANCH (cpu,
13203 " %*scall %" PRIx64 " [%s]"
13204 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13205 stack_depth, " ", aarch64_get_next_PC (cpu),
13206 aarch64_get_func (CPU_STATE (cpu),
13207 aarch64_get_next_PC (cpu)),
13208 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13209 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13210 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13211 );
13212 }
13213 }
13214
13215 /* Unconditional Branch register.
13216 Branch/return address is in source register. */
13217
13218 /* Unconditional branch. */
13219 static void
13220 br (sim_cpu *cpu)
13221 {
13222 unsigned rn = INSTR (9, 5);
13223 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13224 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13225 }
13226
13227 /* Unconditional branch and link -- writes return PC to LR. */
13228 static void
13229 blr (sim_cpu *cpu)
13230 {
13231 unsigned rn = INSTR (9, 5);
13232
13233 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13234 /* The pseudo code in the spec says we update LR before fetching.
13235 the value from the rn. */
13236 aarch64_save_LR (cpu);
13237 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13238
13239 if (TRACE_BRANCH_P (cpu))
13240 {
13241 ++ stack_depth;
13242 TRACE_BRANCH (cpu,
13243 " %*scall %" PRIx64 " [%s]"
13244 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13245 stack_depth, " ", aarch64_get_next_PC (cpu),
13246 aarch64_get_func (CPU_STATE (cpu),
13247 aarch64_get_next_PC (cpu)),
13248 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13249 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13250 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13251 );
13252 }
13253 }
13254
13255 /* Return -- assembler will default source to LR this is functionally
13256 equivalent to br but, presumably, unlike br it side effects the
13257 branch predictor. */
13258 static void
13259 ret (sim_cpu *cpu)
13260 {
13261 unsigned rn = INSTR (9, 5);
13262 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13263
13264 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13265 if (TRACE_BRANCH_P (cpu))
13266 {
13267 TRACE_BRANCH (cpu,
13268 " %*sreturn [result: %" PRIx64 "]",
13269 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13270 -- stack_depth;
13271 }
13272 }
13273
13274 /* NOP -- we implement this and call it from the decode in case we
13275 want to intercept it later. */
13276
13277 static void
13278 nop (sim_cpu *cpu)
13279 {
13280 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13281 }
13282
13283 /* Data synchronization barrier. */
13284
13285 static void
13286 dsb (sim_cpu *cpu)
13287 {
13288 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13289 }
13290
13291 /* Data memory barrier. */
13292
13293 static void
13294 dmb (sim_cpu *cpu)
13295 {
13296 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13297 }
13298
13299 /* Instruction synchronization barrier. */
13300
13301 static void
13302 isb (sim_cpu *cpu)
13303 {
13304 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13305 }
13306
13307 static void
13308 dexBranchImmediate (sim_cpu *cpu)
13309 {
13310 /* assert instr[30,26] == 00101
13311 instr[31] ==> 0 == B, 1 == BL
13312 instr[25,0] == imm26 branch offset counted in words. */
13313
13314 uint32_t top = INSTR (31, 31);
13315 /* We have a 26 byte signed word offset which we need to pass to the
13316 execute routine as a signed byte offset. */
13317 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13318
13319 if (top)
13320 bl (cpu, offset);
13321 else
13322 buc (cpu, offset);
13323 }
13324
13325 /* Control Flow. */
13326
13327 /* Conditional branch
13328
13329 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13330 a bit position in the range 0 .. 63
13331
13332 cc is a CondCode enum value as pulled out of the decode
13333
13334 N.B. any offset register (source) can only be Xn or Wn. */
13335
13336 static void
13337 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13338 {
13339 /* The test returns TRUE if CC is met. */
13340 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13341 if (testConditionCode (cpu, cc))
13342 aarch64_set_next_PC_by_offset (cpu, offset);
13343 }
13344
13345 /* 32 bit branch on register non-zero. */
13346 static void
13347 cbnz32 (sim_cpu *cpu, int32_t offset)
13348 {
13349 unsigned rt = INSTR (4, 0);
13350
13351 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13352 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13353 aarch64_set_next_PC_by_offset (cpu, offset);
13354 }
13355
13356 /* 64 bit branch on register zero. */
13357 static void
13358 cbnz (sim_cpu *cpu, int32_t offset)
13359 {
13360 unsigned rt = INSTR (4, 0);
13361
13362 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13363 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13364 aarch64_set_next_PC_by_offset (cpu, offset);
13365 }
13366
13367 /* 32 bit branch on register non-zero. */
13368 static void
13369 cbz32 (sim_cpu *cpu, int32_t offset)
13370 {
13371 unsigned rt = INSTR (4, 0);
13372
13373 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13374 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13375 aarch64_set_next_PC_by_offset (cpu, offset);
13376 }
13377
13378 /* 64 bit branch on register zero. */
13379 static void
13380 cbz (sim_cpu *cpu, int32_t offset)
13381 {
13382 unsigned rt = INSTR (4, 0);
13383
13384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13385 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13386 aarch64_set_next_PC_by_offset (cpu, offset);
13387 }
13388
13389 /* Branch on register bit test non-zero -- one size fits all. */
13390 static void
13391 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13392 {
13393 unsigned rt = INSTR (4, 0);
13394
13395 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13396 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13397 aarch64_set_next_PC_by_offset (cpu, offset);
13398 }
13399
13400 /* Branch on register bit test zero -- one size fits all. */
13401 static void
13402 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13403 {
13404 unsigned rt = INSTR (4, 0);
13405
13406 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13407 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13408 aarch64_set_next_PC_by_offset (cpu, offset);
13409 }
13410
13411 static void
13412 dexCompareBranchImmediate (sim_cpu *cpu)
13413 {
13414 /* instr[30,25] = 01 1010
13415 instr[31] = size : 0 ==> 32, 1 ==> 64
13416 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13417 instr[23,5] = simm19 branch offset counted in words
13418 instr[4,0] = rt */
13419
13420 uint32_t size = INSTR (31, 31);
13421 uint32_t op = INSTR (24, 24);
13422 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13423
13424 if (size == 0)
13425 {
13426 if (op == 0)
13427 cbz32 (cpu, offset);
13428 else
13429 cbnz32 (cpu, offset);
13430 }
13431 else
13432 {
13433 if (op == 0)
13434 cbz (cpu, offset);
13435 else
13436 cbnz (cpu, offset);
13437 }
13438 }
13439
13440 static void
13441 dexTestBranchImmediate (sim_cpu *cpu)
13442 {
13443 /* instr[31] = b5 : bit 5 of test bit idx
13444 instr[30,25] = 01 1011
13445 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13446 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13447 instr[18,5] = simm14 : signed offset counted in words
13448 instr[4,0] = uimm5 */
13449
13450 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13451 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13452
13453 NYI_assert (30, 25, 0x1b);
13454
13455 if (INSTR (24, 24) == 0)
13456 tbz (cpu, pos, offset);
13457 else
13458 tbnz (cpu, pos, offset);
13459 }
13460
13461 static void
13462 dexCondBranchImmediate (sim_cpu *cpu)
13463 {
13464 /* instr[31,25] = 010 1010
13465 instr[24] = op1; op => 00 ==> B.cond
13466 instr[23,5] = simm19 : signed offset counted in words
13467 instr[4] = op0
13468 instr[3,0] = cond */
13469
13470 int32_t offset;
13471 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13472
13473 NYI_assert (31, 25, 0x2a);
13474
13475 if (op != 0)
13476 HALT_UNALLOC;
13477
13478 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13479
13480 bcc (cpu, offset, INSTR (3, 0));
13481 }
13482
13483 static void
13484 dexBranchRegister (sim_cpu *cpu)
13485 {
13486 /* instr[31,25] = 110 1011
13487 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13488 instr[20,16] = op2 : must be 11111
13489 instr[15,10] = op3 : must be 000000
13490 instr[4,0] = op2 : must be 11111. */
13491
13492 uint32_t op = INSTR (24, 21);
13493 uint32_t op2 = INSTR (20, 16);
13494 uint32_t op3 = INSTR (15, 10);
13495 uint32_t op4 = INSTR (4, 0);
13496
13497 NYI_assert (31, 25, 0x6b);
13498
13499 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13500 HALT_UNALLOC;
13501
13502 if (op == 0)
13503 br (cpu);
13504
13505 else if (op == 1)
13506 blr (cpu);
13507
13508 else if (op == 2)
13509 ret (cpu);
13510
13511 else
13512 {
13513 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13514 /* anything else is unallocated. */
13515 uint32_t rn = INSTR (4, 0);
13516
13517 if (rn != 0x1f)
13518 HALT_UNALLOC;
13519
13520 if (op == 4 || op == 5)
13521 HALT_NYI;
13522
13523 HALT_UNALLOC;
13524 }
13525 }
13526
13527 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13528 but this may not be available. So instead we define the values we need
13529 here. */
13530 #define AngelSVC_Reason_Open 0x01
13531 #define AngelSVC_Reason_Close 0x02
13532 #define AngelSVC_Reason_Write 0x05
13533 #define AngelSVC_Reason_Read 0x06
13534 #define AngelSVC_Reason_IsTTY 0x09
13535 #define AngelSVC_Reason_Seek 0x0A
13536 #define AngelSVC_Reason_FLen 0x0C
13537 #define AngelSVC_Reason_Remove 0x0E
13538 #define AngelSVC_Reason_Rename 0x0F
13539 #define AngelSVC_Reason_Clock 0x10
13540 #define AngelSVC_Reason_Time 0x11
13541 #define AngelSVC_Reason_System 0x12
13542 #define AngelSVC_Reason_Errno 0x13
13543 #define AngelSVC_Reason_GetCmdLine 0x15
13544 #define AngelSVC_Reason_HeapInfo 0x16
13545 #define AngelSVC_Reason_ReportException 0x18
13546 #define AngelSVC_Reason_Elapsed 0x30
13547
13548
13549 static void
13550 handle_halt (sim_cpu *cpu, uint32_t val)
13551 {
13552 uint64_t result = 0;
13553
13554 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13555 if (val != 0xf000)
13556 {
13557 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13558 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13559 sim_stopped, SIM_SIGTRAP);
13560 }
13561
13562 /* We have encountered an Angel SVC call. See if we can process it. */
13563 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13564 {
13565 case AngelSVC_Reason_HeapInfo:
13566 {
13567 /* Get the values. */
13568 uint64_t stack_top = aarch64_get_stack_start (cpu);
13569 uint64_t heap_base = aarch64_get_heap_start (cpu);
13570
13571 /* Get the pointer */
13572 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13573 ptr = aarch64_get_mem_u64 (cpu, ptr);
13574
13575 /* Fill in the memory block. */
13576 /* Start addr of heap. */
13577 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13578 /* End addr of heap. */
13579 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13580 /* Lowest stack addr. */
13581 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13582 /* Initial stack addr. */
13583 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13584
13585 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13586 }
13587 break;
13588
13589 case AngelSVC_Reason_Open:
13590 {
13591 /* Get the pointer */
13592 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13593 /* FIXME: For now we just assume that we will only be asked
13594 to open the standard file descriptors. */
13595 static int fd = 0;
13596 result = fd ++;
13597
13598 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13599 }
13600 break;
13601
13602 case AngelSVC_Reason_Close:
13603 {
13604 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13605 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13606 result = 0;
13607 }
13608 break;
13609
13610 case AngelSVC_Reason_Errno:
13611 result = 0;
13612 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13613 break;
13614
13615 case AngelSVC_Reason_Clock:
13616 result =
13617 #ifdef CLOCKS_PER_SEC
13618 (CLOCKS_PER_SEC >= 100)
13619 ? (clock () / (CLOCKS_PER_SEC / 100))
13620 : ((clock () * 100) / CLOCKS_PER_SEC)
13621 #else
13622 /* Presume unix... clock() returns microseconds. */
13623 (clock () / 10000)
13624 #endif
13625 ;
13626 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13627 break;
13628
13629 case AngelSVC_Reason_GetCmdLine:
13630 {
13631 /* Get the pointer */
13632 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13633 ptr = aarch64_get_mem_u64 (cpu, ptr);
13634
13635 /* FIXME: No command line for now. */
13636 aarch64_set_mem_u64 (cpu, ptr, 0);
13637 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13638 }
13639 break;
13640
13641 case AngelSVC_Reason_IsTTY:
13642 result = 1;
13643 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13644 break;
13645
13646 case AngelSVC_Reason_Write:
13647 {
13648 /* Get the pointer */
13649 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13650 /* Get the write control block. */
13651 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13652 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13653 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13654
13655 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13656 PRIx64 " on descriptor %" PRIx64,
13657 len, buf, fd);
13658
13659 if (len > 1280)
13660 {
13661 TRACE_SYSCALL (cpu,
13662 " AngelSVC: Write: Suspiciously long write: %ld",
13663 (long) len);
13664 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13665 sim_stopped, SIM_SIGBUS);
13666 }
13667 else if (fd == 1)
13668 {
13669 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13670 }
13671 else if (fd == 2)
13672 {
13673 TRACE (cpu, 0, "\n");
13674 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13675 (int) len, aarch64_get_mem_ptr (cpu, buf));
13676 TRACE (cpu, 0, "\n");
13677 }
13678 else
13679 {
13680 TRACE_SYSCALL (cpu,
13681 " AngelSVC: Write: Unexpected file handle: %d",
13682 (int) fd);
13683 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13684 sim_stopped, SIM_SIGABRT);
13685 }
13686 }
13687 break;
13688
13689 case AngelSVC_Reason_ReportException:
13690 {
13691 /* Get the pointer */
13692 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13693 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13694 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13695 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13696
13697 TRACE_SYSCALL (cpu,
13698 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13699 type, state);
13700
13701 if (type == 0x20026)
13702 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13703 sim_exited, state);
13704 else
13705 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13706 sim_stopped, SIM_SIGINT);
13707 }
13708 break;
13709
13710 case AngelSVC_Reason_Read:
13711 case AngelSVC_Reason_FLen:
13712 case AngelSVC_Reason_Seek:
13713 case AngelSVC_Reason_Remove:
13714 case AngelSVC_Reason_Time:
13715 case AngelSVC_Reason_System:
13716 case AngelSVC_Reason_Rename:
13717 case AngelSVC_Reason_Elapsed:
13718 default:
13719 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13720 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13721 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13722 sim_stopped, SIM_SIGTRAP);
13723 }
13724
13725 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13726 }
13727
13728 static void
13729 dexExcpnGen (sim_cpu *cpu)
13730 {
13731 /* instr[31:24] = 11010100
13732 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13733 010 ==> HLT, 101 ==> DBG GEN EXCPN
13734 instr[20,5] = imm16
13735 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13736 instr[1,0] = LL : discriminates opc */
13737
13738 uint32_t opc = INSTR (23, 21);
13739 uint32_t imm16 = INSTR (20, 5);
13740 uint32_t opc2 = INSTR (4, 2);
13741 uint32_t LL;
13742
13743 NYI_assert (31, 24, 0xd4);
13744
13745 if (opc2 != 0)
13746 HALT_UNALLOC;
13747
13748 LL = INSTR (1, 0);
13749
13750 /* We only implement HLT and BRK for now. */
13751 if (opc == 1 && LL == 0)
13752 {
13753 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13754 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13755 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13756 }
13757
13758 if (opc == 2 && LL == 0)
13759 handle_halt (cpu, imm16);
13760
13761 else if (opc == 0 || opc == 5)
13762 HALT_NYI;
13763
13764 else
13765 HALT_UNALLOC;
13766 }
13767
13768 /* Stub for accessing system registers. */
13769
13770 static uint64_t
13771 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13772 unsigned crm, unsigned op2)
13773 {
13774 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13775 /* DCZID_EL0 - the Data Cache Zero ID register.
13776 We do not support DC ZVA at the moment, so
13777 we return a value with the disable bit set.
13778 We implement support for the DCZID register since
13779 it is used by the C library's memset function. */
13780 return ((uint64_t) 1) << 4;
13781
13782 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13783 /* Cache Type Register. */
13784 return 0x80008000UL;
13785
13786 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13787 /* TPIDR_EL0 - thread pointer id. */
13788 return aarch64_get_thread_id (cpu);
13789
13790 if (op1 == 3 && crm == 4 && op2 == 0)
13791 return aarch64_get_FPCR (cpu);
13792
13793 if (op1 == 3 && crm == 4 && op2 == 1)
13794 return aarch64_get_FPSR (cpu);
13795
13796 else if (op1 == 3 && crm == 2 && op2 == 0)
13797 return aarch64_get_CPSR (cpu);
13798
13799 HALT_NYI;
13800 }
13801
13802 static void
13803 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13804 unsigned crm, unsigned op2, uint64_t val)
13805 {
13806 if (op1 == 3 && crm == 4 && op2 == 0)
13807 aarch64_set_FPCR (cpu, val);
13808
13809 else if (op1 == 3 && crm == 4 && op2 == 1)
13810 aarch64_set_FPSR (cpu, val);
13811
13812 else if (op1 == 3 && crm == 2 && op2 == 0)
13813 aarch64_set_CPSR (cpu, val);
13814
13815 else
13816 HALT_NYI;
13817 }
13818
13819 static void
13820 do_mrs (sim_cpu *cpu)
13821 {
13822 /* instr[31:20] = 1101 0101 0001 1
13823 instr[19] = op0
13824 instr[18,16] = op1
13825 instr[15,12] = CRn
13826 instr[11,8] = CRm
13827 instr[7,5] = op2
13828 instr[4,0] = Rt */
13829 unsigned sys_op0 = INSTR (19, 19) + 2;
13830 unsigned sys_op1 = INSTR (18, 16);
13831 unsigned sys_crn = INSTR (15, 12);
13832 unsigned sys_crm = INSTR (11, 8);
13833 unsigned sys_op2 = INSTR (7, 5);
13834 unsigned rt = INSTR (4, 0);
13835
13836 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13837 aarch64_set_reg_u64 (cpu, rt, NO_SP,
13838 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13839 }
13840
13841 static void
13842 do_MSR_immediate (sim_cpu *cpu)
13843 {
13844 /* instr[31:19] = 1101 0101 0000 0
13845 instr[18,16] = op1
13846 instr[15,12] = 0100
13847 instr[11,8] = CRm
13848 instr[7,5] = op2
13849 instr[4,0] = 1 1111 */
13850
13851 unsigned op1 = INSTR (18, 16);
13852 /*unsigned crm = INSTR (11, 8);*/
13853 unsigned op2 = INSTR (7, 5);
13854
13855 NYI_assert (31, 19, 0x1AA0);
13856 NYI_assert (15, 12, 0x4);
13857 NYI_assert (4, 0, 0x1F);
13858
13859 if (op1 == 0)
13860 {
13861 if (op2 == 5)
13862 HALT_NYI; /* set SPSel. */
13863 else
13864 HALT_UNALLOC;
13865 }
13866 else if (op1 == 3)
13867 {
13868 if (op2 == 6)
13869 HALT_NYI; /* set DAIFset. */
13870 else if (op2 == 7)
13871 HALT_NYI; /* set DAIFclr. */
13872 else
13873 HALT_UNALLOC;
13874 }
13875 else
13876 HALT_UNALLOC;
13877 }
13878
13879 static void
13880 do_MSR_reg (sim_cpu *cpu)
13881 {
13882 /* instr[31:20] = 1101 0101 0001
13883 instr[19] = op0
13884 instr[18,16] = op1
13885 instr[15,12] = CRn
13886 instr[11,8] = CRm
13887 instr[7,5] = op2
13888 instr[4,0] = Rt */
13889
13890 unsigned sys_op0 = INSTR (19, 19) + 2;
13891 unsigned sys_op1 = INSTR (18, 16);
13892 unsigned sys_crn = INSTR (15, 12);
13893 unsigned sys_crm = INSTR (11, 8);
13894 unsigned sys_op2 = INSTR (7, 5);
13895 unsigned rt = INSTR (4, 0);
13896
13897 NYI_assert (31, 20, 0xD51);
13898
13899 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13900 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
13901 aarch64_get_reg_u64 (cpu, rt, NO_SP));
13902 }
13903
13904 static void
13905 do_SYS (sim_cpu *cpu)
13906 {
13907 /* instr[31,19] = 1101 0101 0000 1
13908 instr[18,16] = op1
13909 instr[15,12] = CRn
13910 instr[11,8] = CRm
13911 instr[7,5] = op2
13912 instr[4,0] = Rt */
13913 NYI_assert (31, 19, 0x1AA1);
13914
13915 /* FIXME: For now we just silently accept system ops. */
13916 }
13917
13918 static void
13919 dexSystem (sim_cpu *cpu)
13920 {
13921 /* instr[31:22] = 1101 01010 0
13922 instr[21] = L
13923 instr[20,19] = op0
13924 instr[18,16] = op1
13925 instr[15,12] = CRn
13926 instr[11,8] = CRm
13927 instr[7,5] = op2
13928 instr[4,0] = uimm5 */
13929
13930 /* We are interested in HINT, DSB, DMB and ISB
13931
13932 Hint #0 encodes NOOP (this is the only hint we care about)
13933 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
13934 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
13935
13936 DSB, DMB, ISB are data store barrier, data memory barrier and
13937 instruction store barrier, respectively, where
13938
13939 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
13940 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
13941 CRm<3:2> ==> domain, CRm<1:0> ==> types,
13942 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
13943 10 ==> InerShareable, 11 ==> FullSystem
13944 types : 01 ==> Reads, 10 ==> Writes,
13945 11 ==> All, 00 ==> All (domain == FullSystem). */
13946
13947 unsigned rt = INSTR (4, 0);
13948
13949 NYI_assert (31, 22, 0x354);
13950
13951 switch (INSTR (21, 12))
13952 {
13953 case 0x032:
13954 if (rt == 0x1F)
13955 {
13956 /* NOP has CRm != 0000 OR. */
13957 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
13958 uint32_t crm = INSTR (11, 8);
13959 uint32_t op2 = INSTR (7, 5);
13960
13961 if (crm != 0 || (op2 == 0 || op2 > 5))
13962 {
13963 /* Actually call nop method so we can reimplement it later. */
13964 nop (cpu);
13965 return;
13966 }
13967 }
13968 HALT_NYI;
13969
13970 case 0x033:
13971 {
13972 uint32_t op2 = INSTR (7, 5);
13973
13974 switch (op2)
13975 {
13976 case 2: HALT_NYI;
13977 case 4: dsb (cpu); return;
13978 case 5: dmb (cpu); return;
13979 case 6: isb (cpu); return;
13980 default: HALT_UNALLOC;
13981 }
13982 }
13983
13984 case 0x3B0:
13985 case 0x3B4:
13986 case 0x3BD:
13987 do_mrs (cpu);
13988 return;
13989
13990 case 0x0B7:
13991 do_SYS (cpu); /* DC is an alias of SYS. */
13992 return;
13993
13994 default:
13995 if (INSTR (21, 20) == 0x1)
13996 do_MSR_reg (cpu);
13997 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
13998 do_MSR_immediate (cpu);
13999 else
14000 HALT_NYI;
14001 return;
14002 }
14003 }
14004
14005 static void
14006 dexBr (sim_cpu *cpu)
14007 {
14008 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
14009 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
14010 bits [31,29] of a BrExSys are the secondary dispatch vector. */
14011 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14012
14013 switch (group2)
14014 {
14015 case BR_IMM_000:
14016 return dexBranchImmediate (cpu);
14017
14018 case BR_IMMCMP_001:
14019 /* Compare has bit 25 clear while test has it set. */
14020 if (!INSTR (25, 25))
14021 dexCompareBranchImmediate (cpu);
14022 else
14023 dexTestBranchImmediate (cpu);
14024 return;
14025
14026 case BR_IMMCOND_010:
14027 /* This is a conditional branch if bit 25 is clear otherwise
14028 unallocated. */
14029 if (!INSTR (25, 25))
14030 dexCondBranchImmediate (cpu);
14031 else
14032 HALT_UNALLOC;
14033 return;
14034
14035 case BR_UNALLOC_011:
14036 HALT_UNALLOC;
14037
14038 case BR_IMM_100:
14039 dexBranchImmediate (cpu);
14040 return;
14041
14042 case BR_IMMCMP_101:
14043 /* Compare has bit 25 clear while test has it set. */
14044 if (!INSTR (25, 25))
14045 dexCompareBranchImmediate (cpu);
14046 else
14047 dexTestBranchImmediate (cpu);
14048 return;
14049
14050 case BR_REG_110:
14051 /* Unconditional branch reg has bit 25 set. */
14052 if (INSTR (25, 25))
14053 dexBranchRegister (cpu);
14054
14055 /* This includes both Excpn Gen, System and unalloc operations.
14056 We need to decode the Excpn Gen operation BRK so we can plant
14057 debugger entry points.
14058 Excpn Gen operations have instr [24] = 0.
14059 we need to decode at least one of the System operations NOP
14060 which is an alias for HINT #0.
14061 System operations have instr [24,22] = 100. */
14062 else if (INSTR (24, 24) == 0)
14063 dexExcpnGen (cpu);
14064
14065 else if (INSTR (24, 22) == 4)
14066 dexSystem (cpu);
14067
14068 else
14069 HALT_UNALLOC;
14070
14071 return;
14072
14073 case BR_UNALLOC_111:
14074 HALT_UNALLOC;
14075
14076 default:
14077 /* Should never reach here. */
14078 HALT_NYI;
14079 }
14080 }
14081
14082 static void
14083 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14084 {
14085 /* We need to check if gdb wants an in here. */
14086 /* checkBreak (cpu);. */
14087
14088 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14089
14090 switch (group)
14091 {
14092 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
14093 case GROUP_LDST_0100: dexLdSt (cpu); break;
14094 case GROUP_DPREG_0101: dexDPReg (cpu); break;
14095 case GROUP_LDST_0110: dexLdSt (cpu); break;
14096 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
14097 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
14098 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
14099 case GROUP_BREXSYS_1010: dexBr (cpu); break;
14100 case GROUP_BREXSYS_1011: dexBr (cpu); break;
14101 case GROUP_LDST_1100: dexLdSt (cpu); break;
14102 case GROUP_DPREG_1101: dexDPReg (cpu); break;
14103 case GROUP_LDST_1110: dexLdSt (cpu); break;
14104 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
14105
14106 case GROUP_UNALLOC_0001:
14107 case GROUP_UNALLOC_0010:
14108 case GROUP_UNALLOC_0011:
14109 HALT_UNALLOC;
14110
14111 default:
14112 /* Should never reach here. */
14113 HALT_NYI;
14114 }
14115 }
14116
14117 static bfd_boolean
14118 aarch64_step (sim_cpu *cpu)
14119 {
14120 uint64_t pc = aarch64_get_PC (cpu);
14121
14122 if (pc == TOP_LEVEL_RETURN_PC)
14123 return FALSE;
14124
14125 aarch64_set_next_PC (cpu, pc + 4);
14126
14127 /* Code is always little-endian. */
14128 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14129 & aarch64_get_instr (cpu), pc, 4);
14130 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14131
14132 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14133 aarch64_get_instr (cpu));
14134 TRACE_DISASM (cpu, pc);
14135
14136 aarch64_decode_and_execute (cpu, pc);
14137
14138 return TRUE;
14139 }
14140
14141 void
14142 aarch64_run (SIM_DESC sd)
14143 {
14144 sim_cpu *cpu = STATE_CPU (sd, 0);
14145
14146 while (aarch64_step (cpu))
14147 {
14148 aarch64_update_PC (cpu);
14149
14150 if (sim_events_tick (sd))
14151 sim_events_process (sd);
14152 }
14153
14154 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14155 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14156 }
14157
14158 void
14159 aarch64_init (sim_cpu *cpu, uint64_t pc)
14160 {
14161 uint64_t sp = aarch64_get_stack_start (cpu);
14162
14163 /* Install SP, FP and PC and set LR to -20
14164 so we can detect a top-level return. */
14165 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14166 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14167 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14168 aarch64_set_next_PC (cpu, pc);
14169 aarch64_update_PC (cpu);
14170 aarch64_init_LIT_table ();
14171 }
This page took 0.473591 seconds and 4 git commands to generate.