Add self to aarch64 maintainers. Fix mla instruction.
[deliverable/binutils-gdb.git] / sim / aarch64 / simulator.c
1 /* simulator.c -- Interface for the AArch64 simulator.
2
3 Copyright (C) 2015-2017 Free Software Foundation, Inc.
4
5 Contributed by Red Hat.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <sys/types.h>
27 #include <math.h>
28 #include <time.h>
29 #include <limits.h>
30
31 #include "simulator.h"
32 #include "cpustate.h"
33 #include "memory.h"
34
35 #define NO_SP 0
36 #define SP_OK 1
37
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
41
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
44
45 #define HALT_UNALLOC \
46 do \
47 { \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
49 TRACE_INSN (cpu, \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
55 } \
56 while (0)
57
58 #define HALT_NYI \
59 do \
60 { \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
62 TRACE_INSN (cpu, \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
67 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
68 aarch64_get_instr (cpu)); \
69 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
70 sim_stopped, SIM_SIGABRT); \
71 } \
72 while (0)
73
74 #define NYI_assert(HI, LO, EXPECTED) \
75 do \
76 { \
77 if (INSTR ((HI), (LO)) != (EXPECTED)) \
78 HALT_NYI; \
79 } \
80 while (0)
81
82 /* Helper functions used by expandLogicalImmediate. */
83
84 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
85 static inline uint64_t
86 ones (int N)
87 {
88 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
89 }
90
91 /* result<0> to val<N> */
92 static inline uint64_t
93 pickbit (uint64_t val, int N)
94 {
95 return pickbits64 (val, N, N);
96 }
97
98 static uint64_t
99 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
100 {
101 uint64_t mask;
102 uint64_t imm;
103 unsigned simd_size;
104
105 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
106 (in other words, right rotated by R), then replicated. */
107 if (N != 0)
108 {
109 simd_size = 64;
110 mask = 0xffffffffffffffffull;
111 }
112 else
113 {
114 switch (S)
115 {
116 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
117 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
118 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
119 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
120 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
121 default: return 0;
122 }
123 mask = (1ull << simd_size) - 1;
124 /* Top bits are IGNORED. */
125 R &= simd_size - 1;
126 }
127
128 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
129 if (S == simd_size - 1)
130 return 0;
131
132 /* S+1 consecutive bits to 1. */
133 /* NOTE: S can't be 63 due to detection above. */
134 imm = (1ull << (S + 1)) - 1;
135
136 /* Rotate to the left by simd_size - R. */
137 if (R != 0)
138 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
139
140 /* Replicate the value according to SIMD size. */
141 switch (simd_size)
142 {
143 case 2: imm = (imm << 2) | imm;
144 case 4: imm = (imm << 4) | imm;
145 case 8: imm = (imm << 8) | imm;
146 case 16: imm = (imm << 16) | imm;
147 case 32: imm = (imm << 32) | imm;
148 case 64: break;
149 default: return 0;
150 }
151
152 return imm;
153 }
154
155 /* Instr[22,10] encodes N immr and imms. we want a lookup table
156 for each possible combination i.e. 13 bits worth of int entries. */
157 #define LI_TABLE_SIZE (1 << 13)
158 static uint64_t LITable[LI_TABLE_SIZE];
159
160 void
161 aarch64_init_LIT_table (void)
162 {
163 unsigned index;
164
165 for (index = 0; index < LI_TABLE_SIZE; index++)
166 {
167 uint32_t N = uimm (index, 12, 12);
168 uint32_t immr = uimm (index, 11, 6);
169 uint32_t imms = uimm (index, 5, 0);
170
171 LITable [index] = expand_logical_immediate (imms, immr, N);
172 }
173 }
174
175 static void
176 dexNotify (sim_cpu *cpu)
177 {
178 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
179 2 ==> exit Java, 3 ==> start next bytecode. */
180 uint32_t type = INSTR (14, 0);
181
182 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
183
184 switch (type)
185 {
186 case 0:
187 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
188 aarch64_get_reg_u64 (cpu, R22, 0)); */
189 break;
190 case 1:
191 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
192 aarch64_get_reg_u64 (cpu, R22, 0)); */
193 break;
194 case 2:
195 /* aarch64_notifyMethodExit (); */
196 break;
197 case 3:
198 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
199 aarch64_get_reg_u64 (cpu, R22, 0)); */
200 break;
201 }
202 }
203
204 /* secondary decode within top level groups */
205
206 static void
207 dexPseudo (sim_cpu *cpu)
208 {
209 /* assert instr[28,27] = 00
210
211 We provide 2 pseudo instructions:
212
213 HALT stops execution of the simulator causing an immediate
214 return to the x86 code which entered it.
215
216 CALLOUT initiates recursive entry into x86 code. A register
217 argument holds the address of the x86 routine. Immediate
218 values in the instruction identify the number of general
219 purpose and floating point register arguments to be passed
220 and the type of any value to be returned. */
221
222 uint32_t PSEUDO_HALT = 0xE0000000U;
223 uint32_t PSEUDO_CALLOUT = 0x00018000U;
224 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
225 uint32_t PSEUDO_NOTIFY = 0x00014000U;
226 uint32_t dispatch;
227
228 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
229 {
230 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
231 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
232 sim_stopped, SIM_SIGTRAP);
233 }
234
235 dispatch = INSTR (31, 15);
236
237 /* We do not handle callouts at the moment. */
238 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
239 {
240 TRACE_EVENTS (cpu, " Callout");
241 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
242 sim_stopped, SIM_SIGABRT);
243 }
244
245 else if (dispatch == PSEUDO_NOTIFY)
246 dexNotify (cpu);
247
248 else
249 HALT_UNALLOC;
250 }
251
252 /* Load-store single register (unscaled offset)
253 These instructions employ a base register plus an unscaled signed
254 9 bit offset.
255
256 N.B. the base register (source) can be Xn or SP. all other
257 registers may not be SP. */
258
259 /* 32 bit load 32 bit unscaled signed 9 bit. */
260 static void
261 ldur32 (sim_cpu *cpu, int32_t offset)
262 {
263 unsigned rn = INSTR (9, 5);
264 unsigned rt = INSTR (4, 0);
265
266 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
267 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
268 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
269 + offset));
270 }
271
272 /* 64 bit load 64 bit unscaled signed 9 bit. */
273 static void
274 ldur64 (sim_cpu *cpu, int32_t offset)
275 {
276 unsigned rn = INSTR (9, 5);
277 unsigned rt = INSTR (4, 0);
278
279 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
282 + offset));
283 }
284
285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
286 static void
287 ldurb32 (sim_cpu *cpu, int32_t offset)
288 {
289 unsigned rn = INSTR (9, 5);
290 unsigned rt = INSTR (4, 0);
291
292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
293 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
294 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
295 + offset));
296 }
297
298 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
299 static void
300 ldursb32 (sim_cpu *cpu, int32_t offset)
301 {
302 unsigned rn = INSTR (9, 5);
303 unsigned rt = INSTR (4, 0);
304
305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
306 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
307 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
308 + offset));
309 }
310
311 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
312 static void
313 ldursb64 (sim_cpu *cpu, int32_t offset)
314 {
315 unsigned rn = INSTR (9, 5);
316 unsigned rt = INSTR (4, 0);
317
318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
319 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
320 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
321 + offset));
322 }
323
324 /* 32 bit load zero-extended short unscaled signed 9 bit */
325 static void
326 ldurh32 (sim_cpu *cpu, int32_t offset)
327 {
328 unsigned rn = INSTR (9, 5);
329 unsigned rd = INSTR (4, 0);
330
331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
332 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
333 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
334 + offset));
335 }
336
337 /* 32 bit load sign-extended short unscaled signed 9 bit */
338 static void
339 ldursh32 (sim_cpu *cpu, int32_t offset)
340 {
341 unsigned rn = INSTR (9, 5);
342 unsigned rd = INSTR (4, 0);
343
344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
345 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
346 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
347 + offset));
348 }
349
350 /* 64 bit load sign-extended short unscaled signed 9 bit */
351 static void
352 ldursh64 (sim_cpu *cpu, int32_t offset)
353 {
354 unsigned rn = INSTR (9, 5);
355 unsigned rt = INSTR (4, 0);
356
357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
358 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
359 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
360 + offset));
361 }
362
363 /* 64 bit load sign-extended word unscaled signed 9 bit */
364 static void
365 ldursw (sim_cpu *cpu, int32_t offset)
366 {
367 unsigned rn = INSTR (9, 5);
368 unsigned rd = INSTR (4, 0);
369
370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
371 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
372 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
373 + offset));
374 }
375
376 /* N.B. with stores the value in source is written to the address
377 identified by source2 modified by offset. */
378
379 /* 32 bit store 32 bit unscaled signed 9 bit. */
380 static void
381 stur32 (sim_cpu *cpu, int32_t offset)
382 {
383 unsigned rn = INSTR (9, 5);
384 unsigned rd = INSTR (4, 0);
385
386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
387 aarch64_set_mem_u32 (cpu,
388 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
389 aarch64_get_reg_u32 (cpu, rd, NO_SP));
390 }
391
392 /* 64 bit store 64 bit unscaled signed 9 bit */
393 static void
394 stur64 (sim_cpu *cpu, int32_t offset)
395 {
396 unsigned rn = INSTR (9, 5);
397 unsigned rd = INSTR (4, 0);
398
399 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
400 aarch64_set_mem_u64 (cpu,
401 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
402 aarch64_get_reg_u64 (cpu, rd, NO_SP));
403 }
404
405 /* 32 bit store byte unscaled signed 9 bit */
406 static void
407 sturb (sim_cpu *cpu, int32_t offset)
408 {
409 unsigned rn = INSTR (9, 5);
410 unsigned rd = INSTR (4, 0);
411
412 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
413 aarch64_set_mem_u8 (cpu,
414 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
415 aarch64_get_reg_u8 (cpu, rd, NO_SP));
416 }
417
418 /* 32 bit store short unscaled signed 9 bit */
419 static void
420 sturh (sim_cpu *cpu, int32_t offset)
421 {
422 unsigned rn = INSTR (9, 5);
423 unsigned rd = INSTR (4, 0);
424
425 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
426 aarch64_set_mem_u16 (cpu,
427 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
428 aarch64_get_reg_u16 (cpu, rd, NO_SP));
429 }
430
431 /* Load single register pc-relative label
432 Offset is a signed 19 bit immediate count in words
433 rt may not be SP. */
434
435 /* 32 bit pc-relative load */
436 static void
437 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
438 {
439 unsigned rd = INSTR (4, 0);
440
441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
442 aarch64_set_reg_u64 (cpu, rd, NO_SP,
443 aarch64_get_mem_u32
444 (cpu, aarch64_get_PC (cpu) + offset * 4));
445 }
446
447 /* 64 bit pc-relative load */
448 static void
449 ldr_pcrel (sim_cpu *cpu, int32_t offset)
450 {
451 unsigned rd = INSTR (4, 0);
452
453 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
454 aarch64_set_reg_u64 (cpu, rd, NO_SP,
455 aarch64_get_mem_u64
456 (cpu, aarch64_get_PC (cpu) + offset * 4));
457 }
458
459 /* sign extended 32 bit pc-relative load */
460 static void
461 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
462 {
463 unsigned rd = INSTR (4, 0);
464
465 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
466 aarch64_set_reg_u64 (cpu, rd, NO_SP,
467 aarch64_get_mem_s32
468 (cpu, aarch64_get_PC (cpu) + offset * 4));
469 }
470
471 /* float pc-relative load */
472 static void
473 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
474 {
475 unsigned int rd = INSTR (4, 0);
476
477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
478 aarch64_set_vec_u32 (cpu, rd, 0,
479 aarch64_get_mem_u32
480 (cpu, aarch64_get_PC (cpu) + offset * 4));
481 }
482
483 /* double pc-relative load */
484 static void
485 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
486 {
487 unsigned int st = INSTR (4, 0);
488
489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
490 aarch64_set_vec_u64 (cpu, st, 0,
491 aarch64_get_mem_u64
492 (cpu, aarch64_get_PC (cpu) + offset * 4));
493 }
494
495 /* long double pc-relative load. */
496 static void
497 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
498 {
499 unsigned int st = INSTR (4, 0);
500 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
501 FRegister a;
502
503 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
504 aarch64_get_mem_long_double (cpu, addr, & a);
505 aarch64_set_FP_long_double (cpu, st, a);
506 }
507
508 /* This can be used to scale an offset by applying
509 the requisite shift. the second argument is either
510 16, 32 or 64. */
511
512 #define SCALE(_offset, _elementSize) \
513 ((_offset) << ScaleShift ## _elementSize)
514
515 /* This can be used to optionally scale a register derived offset
516 by applying the requisite shift as indicated by the Scaling
517 argument. The second argument is either Byte, Short, Word
518 or Long. The third argument is either Scaled or Unscaled.
519 N.B. when _Scaling is Scaled the shift gets ANDed with
520 all 1s while when it is Unscaled it gets ANDed with 0. */
521
522 #define OPT_SCALE(_offset, _elementType, _Scaling) \
523 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
524
525 /* This can be used to zero or sign extend a 32 bit register derived
526 value to a 64 bit value. the first argument must be the value as
527 a uint32_t and the second must be either UXTW or SXTW. The result
528 is returned as an int64_t. */
529
530 static inline int64_t
531 extend (uint32_t value, Extension extension)
532 {
533 union
534 {
535 uint32_t u;
536 int32_t n;
537 } x;
538
539 /* A branchless variant of this ought to be possible. */
540 if (extension == UXTW || extension == NoExtension)
541 return value;
542
543 x.u = value;
544 return x.n;
545 }
546
547 /* Scalar Floating Point
548
549 FP load/store single register (4 addressing modes)
550
551 N.B. the base register (source) can be the stack pointer.
552 The secondary source register (source2) can only be an Xn register. */
553
554 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
555 static void
556 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
557 {
558 unsigned rn = INSTR (9, 5);
559 unsigned st = INSTR (4, 0);
560 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
561
562 if (wb != Post)
563 address += offset;
564
565 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
566 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
567 if (wb == Post)
568 address += offset;
569
570 if (wb != NoWriteBack)
571 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
572 }
573
574 /* Load 8 bit with unsigned 12 bit offset. */
575 static void
576 fldrb_abs (sim_cpu *cpu, uint32_t offset)
577 {
578 unsigned rd = INSTR (4, 0);
579 unsigned rn = INSTR (9, 5);
580 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
581
582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
583 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
584 }
585
586 /* Load 16 bit scaled unsigned 12 bit. */
587 static void
588 fldrh_abs (sim_cpu *cpu, uint32_t offset)
589 {
590 unsigned rd = INSTR (4, 0);
591 unsigned rn = INSTR (9, 5);
592 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
593
594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
595 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
596 }
597
598 /* Load 32 bit scaled unsigned 12 bit. */
599 static void
600 fldrs_abs (sim_cpu *cpu, uint32_t offset)
601 {
602 unsigned rd = INSTR (4, 0);
603 unsigned rn = INSTR (9, 5);
604 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
605
606 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
607 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
608 }
609
610 /* Load 64 bit scaled unsigned 12 bit. */
611 static void
612 fldrd_abs (sim_cpu *cpu, uint32_t offset)
613 {
614 unsigned rd = INSTR (4, 0);
615 unsigned rn = INSTR (9, 5);
616 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
617
618 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
619 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
620 }
621
622 /* Load 128 bit scaled unsigned 12 bit. */
623 static void
624 fldrq_abs (sim_cpu *cpu, uint32_t offset)
625 {
626 unsigned rd = INSTR (4, 0);
627 unsigned rn = INSTR (9, 5);
628 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
629
630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
631 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
632 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
633 }
634
635 /* Load 32 bit scaled or unscaled zero- or sign-extended
636 32-bit register offset. */
637 static void
638 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
639 {
640 unsigned rm = INSTR (20, 16);
641 unsigned rn = INSTR (9, 5);
642 unsigned st = INSTR (4, 0);
643 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
644 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
645 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
646
647 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
648 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
649 (cpu, address + displacement));
650 }
651
652 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
653 static void
654 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
655 {
656 unsigned rn = INSTR (9, 5);
657 unsigned st = INSTR (4, 0);
658 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
659
660 if (wb != Post)
661 address += offset;
662
663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
664 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
665
666 if (wb == Post)
667 address += offset;
668
669 if (wb != NoWriteBack)
670 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
671 }
672
673 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
674 static void
675 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
676 {
677 unsigned rm = INSTR (20, 16);
678 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
679 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
680
681 fldrd_wb (cpu, displacement, NoWriteBack);
682 }
683
684 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
685 static void
686 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
687 {
688 FRegister a;
689 unsigned rn = INSTR (9, 5);
690 unsigned st = INSTR (4, 0);
691 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
692
693 if (wb != Post)
694 address += offset;
695
696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
697 aarch64_get_mem_long_double (cpu, address, & a);
698 aarch64_set_FP_long_double (cpu, st, a);
699
700 if (wb == Post)
701 address += offset;
702
703 if (wb != NoWriteBack)
704 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
705 }
706
707 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
708 static void
709 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
710 {
711 unsigned rm = INSTR (20, 16);
712 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
713 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
714
715 fldrq_wb (cpu, displacement, NoWriteBack);
716 }
717
718 /* Memory Access
719
720 load-store single register
721 There are four addressing modes available here which all employ a
722 64 bit source (base) register.
723
724 N.B. the base register (source) can be the stack pointer.
725 The secondary source register (source2)can only be an Xn register.
726
727 Scaled, 12-bit, unsigned immediate offset, without pre- and
728 post-index options.
729 Unscaled, 9-bit, signed immediate offset with pre- or post-index
730 writeback.
731 scaled or unscaled 64-bit register offset.
732 scaled or unscaled 32-bit extended register offset.
733
734 All offsets are assumed to be raw from the decode i.e. the
735 simulator is expected to adjust scaled offsets based on the
736 accessed data size with register or extended register offset
737 versions the same applies except that in the latter case the
738 operation may also require a sign extend.
739
740 A separate method is provided for each possible addressing mode. */
741
742 /* 32 bit load 32 bit scaled unsigned 12 bit */
743 static void
744 ldr32_abs (sim_cpu *cpu, uint32_t offset)
745 {
746 unsigned rn = INSTR (9, 5);
747 unsigned rt = INSTR (4, 0);
748
749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
750 /* The target register may not be SP but the source may be. */
751 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
752 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
753 + SCALE (offset, 32)));
754 }
755
756 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
757 static void
758 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
759 {
760 unsigned rn = INSTR (9, 5);
761 unsigned rt = INSTR (4, 0);
762 uint64_t address;
763
764 if (rn == rt && wb != NoWriteBack)
765 HALT_UNALLOC;
766
767 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
768
769 if (wb != Post)
770 address += offset;
771
772 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
773 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
774
775 if (wb == Post)
776 address += offset;
777
778 if (wb != NoWriteBack)
779 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
780 }
781
782 /* 32 bit load 32 bit scaled or unscaled
783 zero- or sign-extended 32-bit register offset */
784 static void
785 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
786 {
787 unsigned rm = INSTR (20, 16);
788 unsigned rn = INSTR (9, 5);
789 unsigned rt = INSTR (4, 0);
790 /* rn may reference SP, rm and rt must reference ZR */
791
792 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
793 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
794 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
795
796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
797 aarch64_set_reg_u64 (cpu, rt, NO_SP,
798 aarch64_get_mem_u32 (cpu, address + displacement));
799 }
800
801 /* 64 bit load 64 bit scaled unsigned 12 bit */
802 static void
803 ldr_abs (sim_cpu *cpu, uint32_t offset)
804 {
805 unsigned rn = INSTR (9, 5);
806 unsigned rt = INSTR (4, 0);
807
808 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
809 /* The target register may not be SP but the source may be. */
810 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
811 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
812 + SCALE (offset, 64)));
813 }
814
815 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
816 static void
817 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
818 {
819 unsigned rn = INSTR (9, 5);
820 unsigned rt = INSTR (4, 0);
821 uint64_t address;
822
823 if (rn == rt && wb != NoWriteBack)
824 HALT_UNALLOC;
825
826 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
827
828 if (wb != Post)
829 address += offset;
830
831 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
832 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
833
834 if (wb == Post)
835 address += offset;
836
837 if (wb != NoWriteBack)
838 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
839 }
840
841 /* 64 bit load 64 bit scaled or unscaled zero-
842 or sign-extended 32-bit register offset. */
843 static void
844 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
845 {
846 unsigned rm = INSTR (20, 16);
847 unsigned rn = INSTR (9, 5);
848 unsigned rt = INSTR (4, 0);
849 /* rn may reference SP, rm and rt must reference ZR */
850
851 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
852 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
853 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
854
855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
856 aarch64_set_reg_u64 (cpu, rt, NO_SP,
857 aarch64_get_mem_u64 (cpu, address + displacement));
858 }
859
860 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
861 static void
862 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
863 {
864 unsigned rn = INSTR (9, 5);
865 unsigned rt = INSTR (4, 0);
866
867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
868 /* The target register may not be SP but the source may be
869 there is no scaling required for a byte load. */
870 aarch64_set_reg_u64 (cpu, rt, NO_SP,
871 aarch64_get_mem_u8
872 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
873 }
874
875 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
876 static void
877 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
878 {
879 unsigned rn = INSTR (9, 5);
880 unsigned rt = INSTR (4, 0);
881 uint64_t address;
882
883 if (rn == rt && wb != NoWriteBack)
884 HALT_UNALLOC;
885
886 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
887
888 if (wb != Post)
889 address += offset;
890
891 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
892 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
893
894 if (wb == Post)
895 address += offset;
896
897 if (wb != NoWriteBack)
898 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
899 }
900
901 /* 32 bit load zero-extended byte scaled or unscaled zero-
902 or sign-extended 32-bit register offset. */
903 static void
904 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
905 {
906 unsigned rm = INSTR (20, 16);
907 unsigned rn = INSTR (9, 5);
908 unsigned rt = INSTR (4, 0);
909 /* rn may reference SP, rm and rt must reference ZR */
910
911 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
912 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
913 extension);
914
915 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
916 /* There is no scaling required for a byte load. */
917 aarch64_set_reg_u64 (cpu, rt, NO_SP,
918 aarch64_get_mem_u8 (cpu, address + displacement));
919 }
920
921 /* 64 bit load sign-extended byte unscaled signed 9 bit
922 with pre- or post-writeback. */
923 static void
924 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
925 {
926 unsigned rn = INSTR (9, 5);
927 unsigned rt = INSTR (4, 0);
928 uint64_t address;
929 int64_t val;
930
931 if (rn == rt && wb != NoWriteBack)
932 HALT_UNALLOC;
933
934 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
935
936 if (wb != Post)
937 address += offset;
938
939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
940 val = aarch64_get_mem_s8 (cpu, address);
941 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
942
943 if (wb == Post)
944 address += offset;
945
946 if (wb != NoWriteBack)
947 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
948 }
949
950 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
951 static void
952 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
953 {
954 ldrsb_wb (cpu, offset, NoWriteBack);
955 }
956
957 /* 64 bit load sign-extended byte scaled or unscaled zero-
958 or sign-extended 32-bit register offset. */
959 static void
960 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
961 {
962 unsigned rm = INSTR (20, 16);
963 unsigned rn = INSTR (9, 5);
964 unsigned rt = INSTR (4, 0);
965 /* rn may reference SP, rm and rt must reference ZR */
966
967 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
968 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
969 extension);
970 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
971 /* There is no scaling required for a byte load. */
972 aarch64_set_reg_s64 (cpu, rt, NO_SP,
973 aarch64_get_mem_s8 (cpu, address + displacement));
974 }
975
976 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
977 static void
978 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
979 {
980 unsigned rn = INSTR (9, 5);
981 unsigned rt = INSTR (4, 0);
982 uint32_t val;
983
984 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
985 /* The target register may not be SP but the source may be. */
986 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
987 + SCALE (offset, 16));
988 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
989 }
990
991 /* 32 bit load zero-extended short unscaled signed 9 bit
992 with pre- or post-writeback. */
993 static void
994 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
995 {
996 unsigned rn = INSTR (9, 5);
997 unsigned rt = INSTR (4, 0);
998 uint64_t address;
999
1000 if (rn == rt && wb != NoWriteBack)
1001 HALT_UNALLOC;
1002
1003 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1004
1005 if (wb != Post)
1006 address += offset;
1007
1008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1009 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1010
1011 if (wb == Post)
1012 address += offset;
1013
1014 if (wb != NoWriteBack)
1015 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1016 }
1017
1018 /* 32 bit load zero-extended short scaled or unscaled zero-
1019 or sign-extended 32-bit register offset. */
1020 static void
1021 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1022 {
1023 unsigned rm = INSTR (20, 16);
1024 unsigned rn = INSTR (9, 5);
1025 unsigned rt = INSTR (4, 0);
1026 /* rn may reference SP, rm and rt must reference ZR */
1027
1028 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1029 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1030 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1031
1032 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1033 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1034 aarch64_get_mem_u16 (cpu, address + displacement));
1035 }
1036
1037 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1038 static void
1039 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1040 {
1041 unsigned rn = INSTR (9, 5);
1042 unsigned rt = INSTR (4, 0);
1043 int32_t val;
1044
1045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1046 /* The target register may not be SP but the source may be. */
1047 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1048 + SCALE (offset, 16));
1049 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1050 }
1051
1052 /* 32 bit load sign-extended short unscaled signed 9 bit
1053 with pre- or post-writeback. */
1054 static void
1055 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1056 {
1057 unsigned rn = INSTR (9, 5);
1058 unsigned rt = INSTR (4, 0);
1059 uint64_t address;
1060
1061 if (rn == rt && wb != NoWriteBack)
1062 HALT_UNALLOC;
1063
1064 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1065
1066 if (wb != Post)
1067 address += offset;
1068
1069 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1070 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1071 (int32_t) aarch64_get_mem_s16 (cpu, address));
1072
1073 if (wb == Post)
1074 address += offset;
1075
1076 if (wb != NoWriteBack)
1077 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1078 }
1079
1080 /* 32 bit load sign-extended short scaled or unscaled zero-
1081 or sign-extended 32-bit register offset. */
1082 static void
1083 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1084 {
1085 unsigned rm = INSTR (20, 16);
1086 unsigned rn = INSTR (9, 5);
1087 unsigned rt = INSTR (4, 0);
1088 /* rn may reference SP, rm and rt must reference ZR */
1089
1090 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1091 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1092 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1093
1094 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1095 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1096 (int32_t) aarch64_get_mem_s16
1097 (cpu, address + displacement));
1098 }
1099
1100 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1101 static void
1102 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1103 {
1104 unsigned rn = INSTR (9, 5);
1105 unsigned rt = INSTR (4, 0);
1106 int64_t val;
1107
1108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1109 /* The target register may not be SP but the source may be. */
1110 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1111 + SCALE (offset, 16));
1112 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1113 }
1114
1115 /* 64 bit load sign-extended short unscaled signed 9 bit
1116 with pre- or post-writeback. */
1117 static void
1118 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1119 {
1120 unsigned rn = INSTR (9, 5);
1121 unsigned rt = INSTR (4, 0);
1122 uint64_t address;
1123 int64_t val;
1124
1125 if (rn == rt && wb != NoWriteBack)
1126 HALT_UNALLOC;
1127
1128 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1129 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1130
1131 if (wb != Post)
1132 address += offset;
1133
1134 val = aarch64_get_mem_s16 (cpu, address);
1135 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1136
1137 if (wb == Post)
1138 address += offset;
1139
1140 if (wb != NoWriteBack)
1141 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1142 }
1143
1144 /* 64 bit load sign-extended short scaled or unscaled zero-
1145 or sign-extended 32-bit register offset. */
1146 static void
1147 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1148 {
1149 unsigned rm = INSTR (20, 16);
1150 unsigned rn = INSTR (9, 5);
1151 unsigned rt = INSTR (4, 0);
1152
1153 /* rn may reference SP, rm and rt must reference ZR */
1154
1155 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1156 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1157 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1158 int64_t val;
1159
1160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1161 val = aarch64_get_mem_s16 (cpu, address + displacement);
1162 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1163 }
1164
1165 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1166 static void
1167 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1168 {
1169 unsigned rn = INSTR (9, 5);
1170 unsigned rt = INSTR (4, 0);
1171 int64_t val;
1172
1173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1174 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1175 + SCALE (offset, 32));
1176 /* The target register may not be SP but the source may be. */
1177 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1178 }
1179
1180 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1181 with pre- or post-writeback. */
1182 static void
1183 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1184 {
1185 unsigned rn = INSTR (9, 5);
1186 unsigned rt = INSTR (4, 0);
1187 uint64_t address;
1188
1189 if (rn == rt && wb != NoWriteBack)
1190 HALT_UNALLOC;
1191
1192 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1193
1194 if (wb != Post)
1195 address += offset;
1196
1197 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1198 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1199
1200 if (wb == Post)
1201 address += offset;
1202
1203 if (wb != NoWriteBack)
1204 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1205 }
1206
1207 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1208 or sign-extended 32-bit register offset. */
1209 static void
1210 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1211 {
1212 unsigned rm = INSTR (20, 16);
1213 unsigned rn = INSTR (9, 5);
1214 unsigned rt = INSTR (4, 0);
1215 /* rn may reference SP, rm and rt must reference ZR */
1216
1217 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1218 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1219 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1220
1221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1222 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1223 aarch64_get_mem_s32 (cpu, address + displacement));
1224 }
1225
1226 /* N.B. with stores the value in source is written to the
1227 address identified by source2 modified by source3/offset. */
1228
1229 /* 32 bit store scaled unsigned 12 bit. */
1230 static void
1231 str32_abs (sim_cpu *cpu, uint32_t offset)
1232 {
1233 unsigned rn = INSTR (9, 5);
1234 unsigned rt = INSTR (4, 0);
1235
1236 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1237 /* The target register may not be SP but the source may be. */
1238 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1239 + SCALE (offset, 32)),
1240 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1241 }
1242
1243 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1244 static void
1245 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1246 {
1247 unsigned rn = INSTR (9, 5);
1248 unsigned rt = INSTR (4, 0);
1249 uint64_t address;
1250
1251 if (rn == rt && wb != NoWriteBack)
1252 HALT_UNALLOC;
1253
1254 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1255 if (wb != Post)
1256 address += offset;
1257
1258 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1259 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1260
1261 if (wb == Post)
1262 address += offset;
1263
1264 if (wb != NoWriteBack)
1265 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1266 }
1267
1268 /* 32 bit store scaled or unscaled zero- or
1269 sign-extended 32-bit register offset. */
1270 static void
1271 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1272 {
1273 unsigned rm = INSTR (20, 16);
1274 unsigned rn = INSTR (9, 5);
1275 unsigned rt = INSTR (4, 0);
1276
1277 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1278 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1279 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1280
1281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1282 aarch64_set_mem_u32 (cpu, address + displacement,
1283 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1284 }
1285
1286 /* 64 bit store scaled unsigned 12 bit. */
1287 static void
1288 str_abs (sim_cpu *cpu, uint32_t offset)
1289 {
1290 unsigned rn = INSTR (9, 5);
1291 unsigned rt = INSTR (4, 0);
1292
1293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1294 aarch64_set_mem_u64 (cpu,
1295 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1296 + SCALE (offset, 64),
1297 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1298 }
1299
1300 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1301 static void
1302 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1303 {
1304 unsigned rn = INSTR (9, 5);
1305 unsigned rt = INSTR (4, 0);
1306 uint64_t address;
1307
1308 if (rn == rt && wb != NoWriteBack)
1309 HALT_UNALLOC;
1310
1311 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1312
1313 if (wb != Post)
1314 address += offset;
1315
1316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1317 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1318
1319 if (wb == Post)
1320 address += offset;
1321
1322 if (wb != NoWriteBack)
1323 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1324 }
1325
1326 /* 64 bit store scaled or unscaled zero-
1327 or sign-extended 32-bit register offset. */
1328 static void
1329 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1330 {
1331 unsigned rm = INSTR (20, 16);
1332 unsigned rn = INSTR (9, 5);
1333 unsigned rt = INSTR (4, 0);
1334 /* rn may reference SP, rm and rt must reference ZR */
1335
1336 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1337 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1338 extension);
1339 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1340
1341 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1342 aarch64_set_mem_u64 (cpu, address + displacement,
1343 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1344 }
1345
1346 /* 32 bit store byte scaled unsigned 12 bit. */
1347 static void
1348 strb_abs (sim_cpu *cpu, uint32_t offset)
1349 {
1350 unsigned rn = INSTR (9, 5);
1351 unsigned rt = INSTR (4, 0);
1352
1353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1354 /* The target register may not be SP but the source may be.
1355 There is no scaling required for a byte load. */
1356 aarch64_set_mem_u8 (cpu,
1357 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1358 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1359 }
1360
1361 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1362 static void
1363 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1364 {
1365 unsigned rn = INSTR (9, 5);
1366 unsigned rt = INSTR (4, 0);
1367 uint64_t address;
1368
1369 if (rn == rt && wb != NoWriteBack)
1370 HALT_UNALLOC;
1371
1372 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1373
1374 if (wb != Post)
1375 address += offset;
1376
1377 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1378 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1379
1380 if (wb == Post)
1381 address += offset;
1382
1383 if (wb != NoWriteBack)
1384 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1385 }
1386
1387 /* 32 bit store byte scaled or unscaled zero-
1388 or sign-extended 32-bit register offset. */
1389 static void
1390 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1391 {
1392 unsigned rm = INSTR (20, 16);
1393 unsigned rn = INSTR (9, 5);
1394 unsigned rt = INSTR (4, 0);
1395 /* rn may reference SP, rm and rt must reference ZR */
1396
1397 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1399 extension);
1400
1401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1402 /* There is no scaling required for a byte load. */
1403 aarch64_set_mem_u8 (cpu, address + displacement,
1404 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1405 }
1406
1407 /* 32 bit store short scaled unsigned 12 bit. */
1408 static void
1409 strh_abs (sim_cpu *cpu, uint32_t offset)
1410 {
1411 unsigned rn = INSTR (9, 5);
1412 unsigned rt = INSTR (4, 0);
1413
1414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1415 /* The target register may not be SP but the source may be. */
1416 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1417 + SCALE (offset, 16),
1418 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1419 }
1420
1421 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1422 static void
1423 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1424 {
1425 unsigned rn = INSTR (9, 5);
1426 unsigned rt = INSTR (4, 0);
1427 uint64_t address;
1428
1429 if (rn == rt && wb != NoWriteBack)
1430 HALT_UNALLOC;
1431
1432 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1433
1434 if (wb != Post)
1435 address += offset;
1436
1437 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1438 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1439
1440 if (wb == Post)
1441 address += offset;
1442
1443 if (wb != NoWriteBack)
1444 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1445 }
1446
1447 /* 32 bit store short scaled or unscaled zero-
1448 or sign-extended 32-bit register offset. */
1449 static void
1450 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1451 {
1452 unsigned rm = INSTR (20, 16);
1453 unsigned rn = INSTR (9, 5);
1454 unsigned rt = INSTR (4, 0);
1455 /* rn may reference SP, rm and rt must reference ZR */
1456
1457 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1458 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1459 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1460
1461 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1462 aarch64_set_mem_u16 (cpu, address + displacement,
1463 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1464 }
1465
1466 /* Prefetch unsigned 12 bit. */
1467 static void
1468 prfm_abs (sim_cpu *cpu, uint32_t offset)
1469 {
1470 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1471 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1472 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1473 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1474 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1475 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1476 ow ==> UNALLOC
1477 PrfOp prfop = prfop (instr, 4, 0);
1478 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1479 + SCALE (offset, 64). */
1480
1481 /* TODO : implement prefetch of address. */
1482 }
1483
1484 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1485 static void
1486 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1487 {
1488 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1489 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1490 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1491 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1492 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1493 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1494 ow ==> UNALLOC
1495 rn may reference SP, rm may only reference ZR
1496 PrfOp prfop = prfop (instr, 4, 0);
1497 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1499 extension);
1500 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1501 uint64_t address = base + displacement. */
1502
1503 /* TODO : implement prefetch of address */
1504 }
1505
1506 /* 64 bit pc-relative prefetch. */
1507 static void
1508 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1509 {
1510 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1511 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1512 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1513 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1514 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1515 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1516 ow ==> UNALLOC
1517 PrfOp prfop = prfop (instr, 4, 0);
1518 uint64_t address = aarch64_get_PC (cpu) + offset. */
1519
1520 /* TODO : implement this */
1521 }
1522
1523 /* Load-store exclusive. */
1524
1525 static void
1526 ldxr (sim_cpu *cpu)
1527 {
1528 unsigned rn = INSTR (9, 5);
1529 unsigned rt = INSTR (4, 0);
1530 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1531 int size = INSTR (31, 30);
1532 /* int ordered = INSTR (15, 15); */
1533 /* int exclusive = ! INSTR (23, 23); */
1534
1535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1536 switch (size)
1537 {
1538 case 0:
1539 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1540 break;
1541 case 1:
1542 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1543 break;
1544 case 2:
1545 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1546 break;
1547 case 3:
1548 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1549 break;
1550 }
1551 }
1552
1553 static void
1554 stxr (sim_cpu *cpu)
1555 {
1556 unsigned rn = INSTR (9, 5);
1557 unsigned rt = INSTR (4, 0);
1558 unsigned rs = INSTR (20, 16);
1559 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1560 int size = INSTR (31, 30);
1561 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1562
1563 switch (size)
1564 {
1565 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1566 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1567 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1568 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1569 }
1570
1571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1572 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1573 }
1574
1575 static void
1576 dexLoadLiteral (sim_cpu *cpu)
1577 {
1578 /* instr[29,27] == 011
1579 instr[25,24] == 00
1580 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1581 010 ==> LDRX, 011 ==> FLDRD
1582 100 ==> LDRSW, 101 ==> FLDRQ
1583 110 ==> PRFM, 111 ==> UNALLOC
1584 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1585 instr[23, 5] == simm19 */
1586
1587 /* unsigned rt = INSTR (4, 0); */
1588 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1589 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1590
1591 switch (dispatch)
1592 {
1593 case 0: ldr32_pcrel (cpu, imm); break;
1594 case 1: fldrs_pcrel (cpu, imm); break;
1595 case 2: ldr_pcrel (cpu, imm); break;
1596 case 3: fldrd_pcrel (cpu, imm); break;
1597 case 4: ldrsw_pcrel (cpu, imm); break;
1598 case 5: fldrq_pcrel (cpu, imm); break;
1599 case 6: prfm_pcrel (cpu, imm); break;
1600 case 7:
1601 default:
1602 HALT_UNALLOC;
1603 }
1604 }
1605
1606 /* Immediate arithmetic
1607 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1608 value left shifted by 12 bits (done at decode).
1609
1610 N.B. the register args (dest, source) can normally be Xn or SP.
1611 the exception occurs for flag setting instructions which may
1612 only use Xn for the output (dest). */
1613
1614 /* 32 bit add immediate. */
1615 static void
1616 add32 (sim_cpu *cpu, uint32_t aimm)
1617 {
1618 unsigned rn = INSTR (9, 5);
1619 unsigned rd = INSTR (4, 0);
1620
1621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1622 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1623 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1624 }
1625
1626 /* 64 bit add immediate. */
1627 static void
1628 add64 (sim_cpu *cpu, uint32_t aimm)
1629 {
1630 unsigned rn = INSTR (9, 5);
1631 unsigned rd = INSTR (4, 0);
1632
1633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1634 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1635 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1636 }
1637
1638 static void
1639 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1640 {
1641 int32_t result = value1 + value2;
1642 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1643 uint64_t uresult = (uint64_t)(uint32_t) value1
1644 + (uint64_t)(uint32_t) value2;
1645 uint32_t flags = 0;
1646
1647 if (result == 0)
1648 flags |= Z;
1649
1650 if (result & (1 << 31))
1651 flags |= N;
1652
1653 if (uresult != result)
1654 flags |= C;
1655
1656 if (sresult != result)
1657 flags |= V;
1658
1659 aarch64_set_CPSR (cpu, flags);
1660 }
1661
1662 #define NEG(a) (((a) & signbit) == signbit)
1663 #define POS(a) (((a) & signbit) == 0)
1664
1665 static void
1666 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1667 {
1668 uint64_t result = value1 + value2;
1669 uint32_t flags = 0;
1670 uint64_t signbit = 1ULL << 63;
1671
1672 if (result == 0)
1673 flags |= Z;
1674
1675 if (NEG (result))
1676 flags |= N;
1677
1678 if ( (NEG (value1) && NEG (value2))
1679 || (NEG (value1) && POS (result))
1680 || (NEG (value2) && POS (result)))
1681 flags |= C;
1682
1683 if ( (NEG (value1) && NEG (value2) && POS (result))
1684 || (POS (value1) && POS (value2) && NEG (result)))
1685 flags |= V;
1686
1687 aarch64_set_CPSR (cpu, flags);
1688 }
1689
1690 static void
1691 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1692 {
1693 uint32_t result = value1 - value2;
1694 uint32_t flags = 0;
1695 uint32_t signbit = 1U << 31;
1696
1697 if (result == 0)
1698 flags |= Z;
1699
1700 if (NEG (result))
1701 flags |= N;
1702
1703 if ( (NEG (value1) && POS (value2))
1704 || (NEG (value1) && POS (result))
1705 || (POS (value2) && POS (result)))
1706 flags |= C;
1707
1708 if ( (NEG (value1) && POS (value2) && POS (result))
1709 || (POS (value1) && NEG (value2) && NEG (result)))
1710 flags |= V;
1711
1712 aarch64_set_CPSR (cpu, flags);
1713 }
1714
1715 static void
1716 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1717 {
1718 uint64_t result = value1 - value2;
1719 uint32_t flags = 0;
1720 uint64_t signbit = 1ULL << 63;
1721
1722 if (result == 0)
1723 flags |= Z;
1724
1725 if (NEG (result))
1726 flags |= N;
1727
1728 if ( (NEG (value1) && POS (value2))
1729 || (NEG (value1) && POS (result))
1730 || (POS (value2) && POS (result)))
1731 flags |= C;
1732
1733 if ( (NEG (value1) && POS (value2) && POS (result))
1734 || (POS (value1) && NEG (value2) && NEG (result)))
1735 flags |= V;
1736
1737 aarch64_set_CPSR (cpu, flags);
1738 }
1739
1740 static void
1741 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1742 {
1743 uint32_t flags = 0;
1744
1745 if (result == 0)
1746 flags |= Z;
1747 else
1748 flags &= ~ Z;
1749
1750 if (result & (1 << 31))
1751 flags |= N;
1752 else
1753 flags &= ~ N;
1754
1755 aarch64_set_CPSR (cpu, flags);
1756 }
1757
1758 static void
1759 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1760 {
1761 uint32_t flags = 0;
1762
1763 if (result == 0)
1764 flags |= Z;
1765 else
1766 flags &= ~ Z;
1767
1768 if (result & (1ULL << 63))
1769 flags |= N;
1770 else
1771 flags &= ~ N;
1772
1773 aarch64_set_CPSR (cpu, flags);
1774 }
1775
1776 /* 32 bit add immediate set flags. */
1777 static void
1778 adds32 (sim_cpu *cpu, uint32_t aimm)
1779 {
1780 unsigned rn = INSTR (9, 5);
1781 unsigned rd = INSTR (4, 0);
1782 /* TODO : do we need to worry about signs here? */
1783 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1784
1785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1786 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1787 set_flags_for_add32 (cpu, value1, aimm);
1788 }
1789
1790 /* 64 bit add immediate set flags. */
1791 static void
1792 adds64 (sim_cpu *cpu, uint32_t aimm)
1793 {
1794 unsigned rn = INSTR (9, 5);
1795 unsigned rd = INSTR (4, 0);
1796 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1797 uint64_t value2 = aimm;
1798
1799 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1800 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1801 set_flags_for_add64 (cpu, value1, value2);
1802 }
1803
1804 /* 32 bit sub immediate. */
1805 static void
1806 sub32 (sim_cpu *cpu, uint32_t aimm)
1807 {
1808 unsigned rn = INSTR (9, 5);
1809 unsigned rd = INSTR (4, 0);
1810
1811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1812 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1813 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1814 }
1815
1816 /* 64 bit sub immediate. */
1817 static void
1818 sub64 (sim_cpu *cpu, uint32_t aimm)
1819 {
1820 unsigned rn = INSTR (9, 5);
1821 unsigned rd = INSTR (4, 0);
1822
1823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1824 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1825 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1826 }
1827
1828 /* 32 bit sub immediate set flags. */
1829 static void
1830 subs32 (sim_cpu *cpu, uint32_t aimm)
1831 {
1832 unsigned rn = INSTR (9, 5);
1833 unsigned rd = INSTR (4, 0);
1834 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1835 uint32_t value2 = aimm;
1836
1837 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1838 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1839 set_flags_for_sub32 (cpu, value1, value2);
1840 }
1841
1842 /* 64 bit sub immediate set flags. */
1843 static void
1844 subs64 (sim_cpu *cpu, uint32_t aimm)
1845 {
1846 unsigned rn = INSTR (9, 5);
1847 unsigned rd = INSTR (4, 0);
1848 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1849 uint32_t value2 = aimm;
1850
1851 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1852 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1853 set_flags_for_sub64 (cpu, value1, value2);
1854 }
1855
1856 /* Data Processing Register. */
1857
1858 /* First two helpers to perform the shift operations. */
1859
1860 static inline uint32_t
1861 shifted32 (uint32_t value, Shift shift, uint32_t count)
1862 {
1863 switch (shift)
1864 {
1865 default:
1866 case LSL:
1867 return (value << count);
1868 case LSR:
1869 return (value >> count);
1870 case ASR:
1871 {
1872 int32_t svalue = value;
1873 return (svalue >> count);
1874 }
1875 case ROR:
1876 {
1877 uint32_t top = value >> count;
1878 uint32_t bottom = value << (32 - count);
1879 return (bottom | top);
1880 }
1881 }
1882 }
1883
1884 static inline uint64_t
1885 shifted64 (uint64_t value, Shift shift, uint32_t count)
1886 {
1887 switch (shift)
1888 {
1889 default:
1890 case LSL:
1891 return (value << count);
1892 case LSR:
1893 return (value >> count);
1894 case ASR:
1895 {
1896 int64_t svalue = value;
1897 return (svalue >> count);
1898 }
1899 case ROR:
1900 {
1901 uint64_t top = value >> count;
1902 uint64_t bottom = value << (64 - count);
1903 return (bottom | top);
1904 }
1905 }
1906 }
1907
1908 /* Arithmetic shifted register.
1909 These allow an optional LSL, ASR or LSR to the second source
1910 register with a count up to the register bit count.
1911
1912 N.B register args may not be SP. */
1913
1914 /* 32 bit ADD shifted register. */
1915 static void
1916 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1917 {
1918 unsigned rm = INSTR (20, 16);
1919 unsigned rn = INSTR (9, 5);
1920 unsigned rd = INSTR (4, 0);
1921
1922 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1923 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1924 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1925 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1926 shift, count));
1927 }
1928
1929 /* 64 bit ADD shifted register. */
1930 static void
1931 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1932 {
1933 unsigned rm = INSTR (20, 16);
1934 unsigned rn = INSTR (9, 5);
1935 unsigned rd = INSTR (4, 0);
1936
1937 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1938 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1939 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1940 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1941 shift, count));
1942 }
1943
1944 /* 32 bit ADD shifted register setting flags. */
1945 static void
1946 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1947 {
1948 unsigned rm = INSTR (20, 16);
1949 unsigned rn = INSTR (9, 5);
1950 unsigned rd = INSTR (4, 0);
1951
1952 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1953 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1954 shift, count);
1955
1956 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1957 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1958 set_flags_for_add32 (cpu, value1, value2);
1959 }
1960
1961 /* 64 bit ADD shifted register setting flags. */
1962 static void
1963 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1964 {
1965 unsigned rm = INSTR (20, 16);
1966 unsigned rn = INSTR (9, 5);
1967 unsigned rd = INSTR (4, 0);
1968
1969 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1970 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1971 shift, count);
1972
1973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1974 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1975 set_flags_for_add64 (cpu, value1, value2);
1976 }
1977
1978 /* 32 bit SUB shifted register. */
1979 static void
1980 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1981 {
1982 unsigned rm = INSTR (20, 16);
1983 unsigned rn = INSTR (9, 5);
1984 unsigned rd = INSTR (4, 0);
1985
1986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1987 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1988 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1989 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1990 shift, count));
1991 }
1992
1993 /* 64 bit SUB shifted register. */
1994 static void
1995 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1996 {
1997 unsigned rm = INSTR (20, 16);
1998 unsigned rn = INSTR (9, 5);
1999 unsigned rd = INSTR (4, 0);
2000
2001 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2002 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2003 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2004 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2005 shift, count));
2006 }
2007
2008 /* 32 bit SUB shifted register setting flags. */
2009 static void
2010 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2011 {
2012 unsigned rm = INSTR (20, 16);
2013 unsigned rn = INSTR (9, 5);
2014 unsigned rd = INSTR (4, 0);
2015
2016 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2017 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2018 shift, count);
2019
2020 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2021 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2022 set_flags_for_sub32 (cpu, value1, value2);
2023 }
2024
2025 /* 64 bit SUB shifted register setting flags. */
2026 static void
2027 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2028 {
2029 unsigned rm = INSTR (20, 16);
2030 unsigned rn = INSTR (9, 5);
2031 unsigned rd = INSTR (4, 0);
2032
2033 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2034 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2035 shift, count);
2036
2037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2038 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2039 set_flags_for_sub64 (cpu, value1, value2);
2040 }
2041
2042 /* First a couple more helpers to fetch the
2043 relevant source register element either
2044 sign or zero extended as required by the
2045 extension value. */
2046
2047 static uint32_t
2048 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2049 {
2050 switch (extension)
2051 {
2052 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2053 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2054 case UXTW: /* Fall through. */
2055 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2056 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2057 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2058 case SXTW: /* Fall through. */
2059 case SXTX: /* Fall through. */
2060 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2061 }
2062 }
2063
2064 static uint64_t
2065 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2066 {
2067 switch (extension)
2068 {
2069 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2070 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2071 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2072 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2073 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2074 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2075 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2076 case SXTX:
2077 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2078 }
2079 }
2080
2081 /* Arithmetic extending register
2082 These allow an optional sign extension of some portion of the
2083 second source register followed by an optional left shift of
2084 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2085
2086 N.B output (dest) and first input arg (source) may normally be Xn
2087 or SP. However, for flag setting operations dest can only be
2088 Xn. Second input registers are always Xn. */
2089
2090 /* 32 bit ADD extending register. */
2091 static void
2092 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2093 {
2094 unsigned rm = INSTR (20, 16);
2095 unsigned rn = INSTR (9, 5);
2096 unsigned rd = INSTR (4, 0);
2097
2098 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2099 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2100 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2101 + (extreg32 (cpu, rm, extension) << shift));
2102 }
2103
2104 /* 64 bit ADD extending register.
2105 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2106 static void
2107 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2108 {
2109 unsigned rm = INSTR (20, 16);
2110 unsigned rn = INSTR (9, 5);
2111 unsigned rd = INSTR (4, 0);
2112
2113 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2114 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2115 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2116 + (extreg64 (cpu, rm, extension) << shift));
2117 }
2118
2119 /* 32 bit ADD extending register setting flags. */
2120 static void
2121 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2122 {
2123 unsigned rm = INSTR (20, 16);
2124 unsigned rn = INSTR (9, 5);
2125 unsigned rd = INSTR (4, 0);
2126
2127 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2128 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2129
2130 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2131 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2132 set_flags_for_add32 (cpu, value1, value2);
2133 }
2134
2135 /* 64 bit ADD extending register setting flags */
2136 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2137 static void
2138 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2139 {
2140 unsigned rm = INSTR (20, 16);
2141 unsigned rn = INSTR (9, 5);
2142 unsigned rd = INSTR (4, 0);
2143
2144 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2145 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2146
2147 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2148 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2149 set_flags_for_add64 (cpu, value1, value2);
2150 }
2151
2152 /* 32 bit SUB extending register. */
2153 static void
2154 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2155 {
2156 unsigned rm = INSTR (20, 16);
2157 unsigned rn = INSTR (9, 5);
2158 unsigned rd = INSTR (4, 0);
2159
2160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2161 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2162 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2163 - (extreg32 (cpu, rm, extension) << shift));
2164 }
2165
2166 /* 64 bit SUB extending register. */
2167 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2168 static void
2169 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2170 {
2171 unsigned rm = INSTR (20, 16);
2172 unsigned rn = INSTR (9, 5);
2173 unsigned rd = INSTR (4, 0);
2174
2175 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2176 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2177 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2178 - (extreg64 (cpu, rm, extension) << shift));
2179 }
2180
2181 /* 32 bit SUB extending register setting flags. */
2182 static void
2183 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2184 {
2185 unsigned rm = INSTR (20, 16);
2186 unsigned rn = INSTR (9, 5);
2187 unsigned rd = INSTR (4, 0);
2188
2189 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2190 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2191
2192 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2193 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2194 set_flags_for_sub32 (cpu, value1, value2);
2195 }
2196
2197 /* 64 bit SUB extending register setting flags */
2198 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2199 static void
2200 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2201 {
2202 unsigned rm = INSTR (20, 16);
2203 unsigned rn = INSTR (9, 5);
2204 unsigned rd = INSTR (4, 0);
2205
2206 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2207 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2208
2209 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2210 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2211 set_flags_for_sub64 (cpu, value1, value2);
2212 }
2213
2214 static void
2215 dexAddSubtractImmediate (sim_cpu *cpu)
2216 {
2217 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2218 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2219 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2220 instr[28,24] = 10001
2221 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2222 instr[21,10] = uimm12
2223 instr[9,5] = Rn
2224 instr[4,0] = Rd */
2225
2226 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2227 uint32_t shift = INSTR (23, 22);
2228 uint32_t imm = INSTR (21, 10);
2229 uint32_t dispatch = INSTR (31, 29);
2230
2231 NYI_assert (28, 24, 0x11);
2232
2233 if (shift > 1)
2234 HALT_UNALLOC;
2235
2236 if (shift)
2237 imm <<= 12;
2238
2239 switch (dispatch)
2240 {
2241 case 0: add32 (cpu, imm); break;
2242 case 1: adds32 (cpu, imm); break;
2243 case 2: sub32 (cpu, imm); break;
2244 case 3: subs32 (cpu, imm); break;
2245 case 4: add64 (cpu, imm); break;
2246 case 5: adds64 (cpu, imm); break;
2247 case 6: sub64 (cpu, imm); break;
2248 case 7: subs64 (cpu, imm); break;
2249 }
2250 }
2251
2252 static void
2253 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2254 {
2255 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2256 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2257 instr[28,24] = 01011
2258 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2259 instr[21] = 0
2260 instr[20,16] = Rm
2261 instr[15,10] = count : must be 0xxxxx for 32 bit
2262 instr[9,5] = Rn
2263 instr[4,0] = Rd */
2264
2265 uint32_t size = INSTR (31, 31);
2266 uint32_t count = INSTR (15, 10);
2267 Shift shiftType = INSTR (23, 22);
2268
2269 NYI_assert (28, 24, 0x0B);
2270 NYI_assert (21, 21, 0);
2271
2272 /* Shift encoded as ROR is unallocated. */
2273 if (shiftType == ROR)
2274 HALT_UNALLOC;
2275
2276 /* 32 bit operations must have count[5] = 0
2277 or else we have an UNALLOC. */
2278 if (size == 0 && uimm (count, 5, 5))
2279 HALT_UNALLOC;
2280
2281 /* Dispatch on size:op i.e instr [31,29]. */
2282 switch (INSTR (31, 29))
2283 {
2284 case 0: add32_shift (cpu, shiftType, count); break;
2285 case 1: adds32_shift (cpu, shiftType, count); break;
2286 case 2: sub32_shift (cpu, shiftType, count); break;
2287 case 3: subs32_shift (cpu, shiftType, count); break;
2288 case 4: add64_shift (cpu, shiftType, count); break;
2289 case 5: adds64_shift (cpu, shiftType, count); break;
2290 case 6: sub64_shift (cpu, shiftType, count); break;
2291 case 7: subs64_shift (cpu, shiftType, count); break;
2292 }
2293 }
2294
2295 static void
2296 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2297 {
2298 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2299 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2300 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2301 instr[28,24] = 01011
2302 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2303 instr[21] = 1
2304 instr[20,16] = Rm
2305 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2306 000 ==> LSL|UXTW, 001 ==> UXTZ,
2307 000 ==> SXTB, 001 ==> SXTH,
2308 000 ==> SXTW, 001 ==> SXTX,
2309 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2310 instr[9,5] = Rn
2311 instr[4,0] = Rd */
2312
2313 Extension extensionType = INSTR (15, 13);
2314 uint32_t shift = INSTR (12, 10);
2315
2316 NYI_assert (28, 24, 0x0B);
2317 NYI_assert (21, 21, 1);
2318
2319 /* Shift may not exceed 4. */
2320 if (shift > 4)
2321 HALT_UNALLOC;
2322
2323 /* Dispatch on size:op:set?. */
2324 switch (INSTR (31, 29))
2325 {
2326 case 0: add32_ext (cpu, extensionType, shift); break;
2327 case 1: adds32_ext (cpu, extensionType, shift); break;
2328 case 2: sub32_ext (cpu, extensionType, shift); break;
2329 case 3: subs32_ext (cpu, extensionType, shift); break;
2330 case 4: add64_ext (cpu, extensionType, shift); break;
2331 case 5: adds64_ext (cpu, extensionType, shift); break;
2332 case 6: sub64_ext (cpu, extensionType, shift); break;
2333 case 7: subs64_ext (cpu, extensionType, shift); break;
2334 }
2335 }
2336
2337 /* Conditional data processing
2338 Condition register is implicit 3rd source. */
2339
2340 /* 32 bit add with carry. */
2341 /* N.B register args may not be SP. */
2342
2343 static void
2344 adc32 (sim_cpu *cpu)
2345 {
2346 unsigned rm = INSTR (20, 16);
2347 unsigned rn = INSTR (9, 5);
2348 unsigned rd = INSTR (4, 0);
2349
2350 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2351 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2352 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2353 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2354 + IS_SET (C));
2355 }
2356
2357 /* 64 bit add with carry */
2358 static void
2359 adc64 (sim_cpu *cpu)
2360 {
2361 unsigned rm = INSTR (20, 16);
2362 unsigned rn = INSTR (9, 5);
2363 unsigned rd = INSTR (4, 0);
2364
2365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2366 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2367 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2368 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2369 + IS_SET (C));
2370 }
2371
2372 /* 32 bit add with carry setting flags. */
2373 static void
2374 adcs32 (sim_cpu *cpu)
2375 {
2376 unsigned rm = INSTR (20, 16);
2377 unsigned rn = INSTR (9, 5);
2378 unsigned rd = INSTR (4, 0);
2379
2380 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2381 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2382 uint32_t carry = IS_SET (C);
2383
2384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2385 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2386 set_flags_for_add32 (cpu, value1, value2 + carry);
2387 }
2388
2389 /* 64 bit add with carry setting flags. */
2390 static void
2391 adcs64 (sim_cpu *cpu)
2392 {
2393 unsigned rm = INSTR (20, 16);
2394 unsigned rn = INSTR (9, 5);
2395 unsigned rd = INSTR (4, 0);
2396
2397 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2398 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2399 uint64_t carry = IS_SET (C);
2400
2401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2402 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2403 set_flags_for_add64 (cpu, value1, value2 + carry);
2404 }
2405
2406 /* 32 bit sub with carry. */
2407 static void
2408 sbc32 (sim_cpu *cpu)
2409 {
2410 unsigned rm = INSTR (20, 16);
2411 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2412 unsigned rd = INSTR (4, 0);
2413
2414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2415 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2416 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2417 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2418 - 1 + IS_SET (C));
2419 }
2420
2421 /* 64 bit sub with carry */
2422 static void
2423 sbc64 (sim_cpu *cpu)
2424 {
2425 unsigned rm = INSTR (20, 16);
2426 unsigned rn = INSTR (9, 5);
2427 unsigned rd = INSTR (4, 0);
2428
2429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2430 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2431 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2432 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2433 - 1 + IS_SET (C));
2434 }
2435
2436 /* 32 bit sub with carry setting flags */
2437 static void
2438 sbcs32 (sim_cpu *cpu)
2439 {
2440 unsigned rm = INSTR (20, 16);
2441 unsigned rn = INSTR (9, 5);
2442 unsigned rd = INSTR (4, 0);
2443
2444 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2445 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2446 uint32_t carry = IS_SET (C);
2447 uint32_t result = value1 - value2 + 1 - carry;
2448
2449 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2450 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2451 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2452 }
2453
2454 /* 64 bit sub with carry setting flags */
2455 static void
2456 sbcs64 (sim_cpu *cpu)
2457 {
2458 unsigned rm = INSTR (20, 16);
2459 unsigned rn = INSTR (9, 5);
2460 unsigned rd = INSTR (4, 0);
2461
2462 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2463 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2464 uint64_t carry = IS_SET (C);
2465 uint64_t result = value1 - value2 + 1 - carry;
2466
2467 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2468 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2469 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2470 }
2471
2472 static void
2473 dexAddSubtractWithCarry (sim_cpu *cpu)
2474 {
2475 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2476 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2477 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2478 instr[28,21] = 1 1010 000
2479 instr[20,16] = Rm
2480 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2481 instr[9,5] = Rn
2482 instr[4,0] = Rd */
2483
2484 uint32_t op2 = INSTR (15, 10);
2485
2486 NYI_assert (28, 21, 0xD0);
2487
2488 if (op2 != 0)
2489 HALT_UNALLOC;
2490
2491 /* Dispatch on size:op:set?. */
2492 switch (INSTR (31, 29))
2493 {
2494 case 0: adc32 (cpu); break;
2495 case 1: adcs32 (cpu); break;
2496 case 2: sbc32 (cpu); break;
2497 case 3: sbcs32 (cpu); break;
2498 case 4: adc64 (cpu); break;
2499 case 5: adcs64 (cpu); break;
2500 case 6: sbc64 (cpu); break;
2501 case 7: sbcs64 (cpu); break;
2502 }
2503 }
2504
2505 static uint32_t
2506 testConditionCode (sim_cpu *cpu, CondCode cc)
2507 {
2508 /* This should be reduceable to branchless logic
2509 by some careful testing of bits in CC followed
2510 by the requisite masking and combining of bits
2511 from the flag register.
2512
2513 For now we do it with a switch. */
2514 int res;
2515
2516 switch (cc)
2517 {
2518 case EQ: res = IS_SET (Z); break;
2519 case NE: res = IS_CLEAR (Z); break;
2520 case CS: res = IS_SET (C); break;
2521 case CC: res = IS_CLEAR (C); break;
2522 case MI: res = IS_SET (N); break;
2523 case PL: res = IS_CLEAR (N); break;
2524 case VS: res = IS_SET (V); break;
2525 case VC: res = IS_CLEAR (V); break;
2526 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2527 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2528 case GE: res = IS_SET (N) == IS_SET (V); break;
2529 case LT: res = IS_SET (N) != IS_SET (V); break;
2530 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2531 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2532 case AL:
2533 case NV:
2534 default:
2535 res = 1;
2536 break;
2537 }
2538 return res;
2539 }
2540
2541 static void
2542 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2543 {
2544 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2545 instr[30] = compare with positive (1) or negative value (0)
2546 instr[29,21] = 1 1101 0010
2547 instr[20,16] = Rm or const
2548 instr[15,12] = cond
2549 instr[11] = compare reg (0) or const (1)
2550 instr[10] = 0
2551 instr[9,5] = Rn
2552 instr[4] = 0
2553 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2554 signed int negate;
2555 unsigned rm;
2556 unsigned rn;
2557
2558 NYI_assert (29, 21, 0x1d2);
2559 NYI_assert (10, 10, 0);
2560 NYI_assert (4, 4, 0);
2561
2562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2563 if (! testConditionCode (cpu, INSTR (15, 12)))
2564 {
2565 aarch64_set_CPSR (cpu, INSTR (3, 0));
2566 return;
2567 }
2568
2569 negate = INSTR (30, 30) ? 1 : -1;
2570 rm = INSTR (20, 16);
2571 rn = INSTR ( 9, 5);
2572
2573 if (INSTR (31, 31))
2574 {
2575 if (INSTR (11, 11))
2576 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2577 negate * (uint64_t) rm);
2578 else
2579 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2580 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2581 }
2582 else
2583 {
2584 if (INSTR (11, 11))
2585 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2586 negate * rm);
2587 else
2588 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2589 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2590 }
2591 }
2592
2593 static void
2594 do_vec_MOV_whole_vector (sim_cpu *cpu)
2595 {
2596 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2597
2598 instr[31] = 0
2599 instr[30] = half(0)/full(1)
2600 instr[29,21] = 001110101
2601 instr[20,16] = Vs
2602 instr[15,10] = 000111
2603 instr[9,5] = Vs
2604 instr[4,0] = Vd */
2605
2606 unsigned vs = INSTR (9, 5);
2607 unsigned vd = INSTR (4, 0);
2608
2609 NYI_assert (29, 21, 0x075);
2610 NYI_assert (15, 10, 0x07);
2611
2612 if (INSTR (20, 16) != vs)
2613 HALT_NYI;
2614
2615 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2616 if (INSTR (30, 30))
2617 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2618
2619 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2620 }
2621
2622 static void
2623 do_vec_MOV_into_scalar (sim_cpu *cpu)
2624 {
2625 /* instr[31] = 0
2626 instr[30] = word(0)/long(1)
2627 instr[29,21] = 00 1110 000
2628 instr[20,18] = element size and index
2629 instr[17,10] = 00 0011 11
2630 instr[9,5] = V source
2631 instr[4,0] = R dest */
2632
2633 unsigned vs = INSTR (9, 5);
2634 unsigned rd = INSTR (4, 0);
2635
2636 NYI_assert (29, 21, 0x070);
2637 NYI_assert (17, 10, 0x0F);
2638
2639 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2640 switch (INSTR (20, 18))
2641 {
2642 case 0x2:
2643 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
2644 break;
2645
2646 case 0x6:
2647 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
2648 break;
2649
2650 case 0x1:
2651 case 0x3:
2652 case 0x5:
2653 case 0x7:
2654 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
2655 (cpu, vs, INSTR (20, 19)));
2656 break;
2657
2658 default:
2659 HALT_NYI;
2660 }
2661 }
2662
2663 static void
2664 do_vec_INS (sim_cpu *cpu)
2665 {
2666 /* instr[31,21] = 01001110000
2667 instr[20,16] = element size and index
2668 instr[15,10] = 000111
2669 instr[9,5] = W source
2670 instr[4,0] = V dest */
2671
2672 int index;
2673 unsigned rs = INSTR (9, 5);
2674 unsigned vd = INSTR (4, 0);
2675
2676 NYI_assert (31, 21, 0x270);
2677 NYI_assert (15, 10, 0x07);
2678
2679 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2680 if (INSTR (16, 16))
2681 {
2682 index = INSTR (20, 17);
2683 aarch64_set_vec_u8 (cpu, vd, index,
2684 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2685 }
2686 else if (INSTR (17, 17))
2687 {
2688 index = INSTR (20, 18);
2689 aarch64_set_vec_u16 (cpu, vd, index,
2690 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2691 }
2692 else if (INSTR (18, 18))
2693 {
2694 index = INSTR (20, 19);
2695 aarch64_set_vec_u32 (cpu, vd, index,
2696 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2697 }
2698 else if (INSTR (19, 19))
2699 {
2700 index = INSTR (20, 20);
2701 aarch64_set_vec_u64 (cpu, vd, index,
2702 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2703 }
2704 else
2705 HALT_NYI;
2706 }
2707
2708 static void
2709 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2710 {
2711 /* instr[31] = 0
2712 instr[30] = half(0)/full(1)
2713 instr[29,21] = 00 1110 000
2714 instr[20,16] = element size and index
2715 instr[15,10] = 0000 01
2716 instr[9,5] = V source
2717 instr[4,0] = V dest. */
2718
2719 unsigned full = INSTR (30, 30);
2720 unsigned vs = INSTR (9, 5);
2721 unsigned vd = INSTR (4, 0);
2722 int i, index;
2723
2724 NYI_assert (29, 21, 0x070);
2725 NYI_assert (15, 10, 0x01);
2726
2727 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2728 if (INSTR (16, 16))
2729 {
2730 index = INSTR (20, 17);
2731
2732 for (i = 0; i < (full ? 16 : 8); i++)
2733 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2734 }
2735 else if (INSTR (17, 17))
2736 {
2737 index = INSTR (20, 18);
2738
2739 for (i = 0; i < (full ? 8 : 4); i++)
2740 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2741 }
2742 else if (INSTR (18, 18))
2743 {
2744 index = INSTR (20, 19);
2745
2746 for (i = 0; i < (full ? 4 : 2); i++)
2747 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2748 }
2749 else
2750 {
2751 if (INSTR (19, 19) == 0)
2752 HALT_UNALLOC;
2753
2754 if (! full)
2755 HALT_UNALLOC;
2756
2757 index = INSTR (20, 20);
2758
2759 for (i = 0; i < 2; i++)
2760 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2761 }
2762 }
2763
2764 static void
2765 do_vec_TBL (sim_cpu *cpu)
2766 {
2767 /* instr[31] = 0
2768 instr[30] = half(0)/full(1)
2769 instr[29,21] = 00 1110 000
2770 instr[20,16] = Vm
2771 instr[15] = 0
2772 instr[14,13] = vec length
2773 instr[12,10] = 000
2774 instr[9,5] = V start
2775 instr[4,0] = V dest */
2776
2777 int full = INSTR (30, 30);
2778 int len = INSTR (14, 13) + 1;
2779 unsigned vm = INSTR (20, 16);
2780 unsigned vn = INSTR (9, 5);
2781 unsigned vd = INSTR (4, 0);
2782 unsigned i;
2783
2784 NYI_assert (29, 21, 0x070);
2785 NYI_assert (12, 10, 0);
2786
2787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2788 for (i = 0; i < (full ? 16 : 8); i++)
2789 {
2790 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2791 uint8_t val;
2792
2793 if (selector < 16)
2794 val = aarch64_get_vec_u8 (cpu, vn, selector);
2795 else if (selector < 32)
2796 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2797 else if (selector < 48)
2798 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2799 else if (selector < 64)
2800 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2801 else
2802 val = 0;
2803
2804 aarch64_set_vec_u8 (cpu, vd, i, val);
2805 }
2806 }
2807
2808 static void
2809 do_vec_TRN (sim_cpu *cpu)
2810 {
2811 /* instr[31] = 0
2812 instr[30] = half(0)/full(1)
2813 instr[29,24] = 00 1110
2814 instr[23,22] = size
2815 instr[21] = 0
2816 instr[20,16] = Vm
2817 instr[15] = 0
2818 instr[14] = TRN1 (0) / TRN2 (1)
2819 instr[13,10] = 1010
2820 instr[9,5] = V source
2821 instr[4,0] = V dest. */
2822
2823 int full = INSTR (30, 30);
2824 int second = INSTR (14, 14);
2825 unsigned vm = INSTR (20, 16);
2826 unsigned vn = INSTR (9, 5);
2827 unsigned vd = INSTR (4, 0);
2828 unsigned i;
2829
2830 NYI_assert (29, 24, 0x0E);
2831 NYI_assert (13, 10, 0xA);
2832
2833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2834 switch (INSTR (23, 22))
2835 {
2836 case 0:
2837 for (i = 0; i < (full ? 8 : 4); i++)
2838 {
2839 aarch64_set_vec_u8
2840 (cpu, vd, i * 2,
2841 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2842 aarch64_set_vec_u8
2843 (cpu, vd, 1 * 2 + 1,
2844 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2845 }
2846 break;
2847
2848 case 1:
2849 for (i = 0; i < (full ? 4 : 2); i++)
2850 {
2851 aarch64_set_vec_u16
2852 (cpu, vd, i * 2,
2853 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2854 aarch64_set_vec_u16
2855 (cpu, vd, 1 * 2 + 1,
2856 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2857 }
2858 break;
2859
2860 case 2:
2861 aarch64_set_vec_u32
2862 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2863 aarch64_set_vec_u32
2864 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2865 aarch64_set_vec_u32
2866 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2867 aarch64_set_vec_u32
2868 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2869 break;
2870
2871 case 3:
2872 if (! full)
2873 HALT_UNALLOC;
2874
2875 aarch64_set_vec_u64 (cpu, vd, 0,
2876 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2877 aarch64_set_vec_u64 (cpu, vd, 1,
2878 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2879 break;
2880 }
2881 }
2882
2883 static void
2884 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2885 {
2886 /* instr[31] = 0
2887 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2888 [must be 1 for 64-bit xfer]
2889 instr[29,20] = 00 1110 0000
2890 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2891 0100=> 32-bits. 1000=>64-bits
2892 instr[15,10] = 0000 11
2893 instr[9,5] = W source
2894 instr[4,0] = V dest. */
2895
2896 unsigned i;
2897 unsigned Vd = INSTR (4, 0);
2898 unsigned Rs = INSTR (9, 5);
2899 int both = INSTR (30, 30);
2900
2901 NYI_assert (29, 20, 0x0E0);
2902 NYI_assert (15, 10, 0x03);
2903
2904 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2905 switch (INSTR (19, 16))
2906 {
2907 case 1:
2908 for (i = 0; i < (both ? 16 : 8); i++)
2909 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
2910 break;
2911
2912 case 2:
2913 for (i = 0; i < (both ? 8 : 4); i++)
2914 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
2915 break;
2916
2917 case 4:
2918 for (i = 0; i < (both ? 4 : 2); i++)
2919 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
2920 break;
2921
2922 case 8:
2923 if (!both)
2924 HALT_NYI;
2925 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2926 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2927 break;
2928
2929 default:
2930 HALT_NYI;
2931 }
2932 }
2933
2934 static void
2935 do_vec_UZP (sim_cpu *cpu)
2936 {
2937 /* instr[31] = 0
2938 instr[30] = half(0)/full(1)
2939 instr[29,24] = 00 1110
2940 instr[23,22] = size: byte(00), half(01), word (10), long (11)
2941 instr[21] = 0
2942 instr[20,16] = Vm
2943 instr[15] = 0
2944 instr[14] = lower (0) / upper (1)
2945 instr[13,10] = 0110
2946 instr[9,5] = Vn
2947 instr[4,0] = Vd. */
2948
2949 int full = INSTR (30, 30);
2950 int upper = INSTR (14, 14);
2951
2952 unsigned vm = INSTR (20, 16);
2953 unsigned vn = INSTR (9, 5);
2954 unsigned vd = INSTR (4, 0);
2955
2956 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2957 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2958 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2959 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2960
2961 uint64_t val1;
2962 uint64_t val2;
2963
2964 uint64_t input2 = full ? val_n2 : val_m1;
2965
2966 NYI_assert (29, 24, 0x0E);
2967 NYI_assert (21, 21, 0);
2968 NYI_assert (15, 15, 0);
2969 NYI_assert (13, 10, 6);
2970
2971 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2972 switch (INSTR (23, 22))
2973 {
2974 case 0:
2975 val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
2976 val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
2977 val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
2978 val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
2979
2980 val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
2981 val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
2982 val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
2983 val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
2984
2985 if (full)
2986 {
2987 val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
2988 val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
2989 val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
2990 val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
2991
2992 val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
2993 val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
2994 val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
2995 val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
2996 }
2997 break;
2998
2999 case 1:
3000 val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3001 val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3002
3003 val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3004 val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3005
3006 if (full)
3007 {
3008 val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3009 val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3010
3011 val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3012 val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3013 }
3014 break;
3015
3016 case 2:
3017 val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3018 val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3019
3020 if (full)
3021 {
3022 val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3023 val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3024 }
3025 break;
3026
3027 case 3:
3028 if (! full)
3029 HALT_UNALLOC;
3030
3031 val1 = upper ? val_n2 : val_n1;
3032 val2 = upper ? val_m2 : val_m1;
3033 break;
3034 }
3035
3036 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3037 if (full)
3038 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3039 }
3040
3041 static void
3042 do_vec_ZIP (sim_cpu *cpu)
3043 {
3044 /* instr[31] = 0
3045 instr[30] = half(0)/full(1)
3046 instr[29,24] = 00 1110
3047 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3048 instr[21] = 0
3049 instr[20,16] = Vm
3050 instr[15] = 0
3051 instr[14] = lower (0) / upper (1)
3052 instr[13,10] = 1110
3053 instr[9,5] = Vn
3054 instr[4,0] = Vd. */
3055
3056 int full = INSTR (30, 30);
3057 int upper = INSTR (14, 14);
3058
3059 unsigned vm = INSTR (20, 16);
3060 unsigned vn = INSTR (9, 5);
3061 unsigned vd = INSTR (4, 0);
3062
3063 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3064 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3065 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3066 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3067
3068 uint64_t val1 = 0;
3069 uint64_t val2 = 0;
3070
3071 uint64_t input1 = upper ? val_n1 : val_m1;
3072 uint64_t input2 = upper ? val_n2 : val_m2;
3073
3074 NYI_assert (29, 24, 0x0E);
3075 NYI_assert (21, 21, 0);
3076 NYI_assert (15, 15, 0);
3077 NYI_assert (13, 10, 0xE);
3078
3079 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3080 switch (INSTR (23, 23))
3081 {
3082 case 0:
3083 val1 =
3084 ((input1 << 0) & (0xFF << 0))
3085 | ((input2 << 8) & (0xFF << 8))
3086 | ((input1 << 8) & (0xFF << 16))
3087 | ((input2 << 16) & (0xFF << 24))
3088 | ((input1 << 16) & (0xFFULL << 32))
3089 | ((input2 << 24) & (0xFFULL << 40))
3090 | ((input1 << 24) & (0xFFULL << 48))
3091 | ((input2 << 32) & (0xFFULL << 56));
3092
3093 val2 =
3094 ((input1 >> 32) & (0xFF << 0))
3095 | ((input2 >> 24) & (0xFF << 8))
3096 | ((input1 >> 24) & (0xFF << 16))
3097 | ((input2 >> 16) & (0xFF << 24))
3098 | ((input1 >> 16) & (0xFFULL << 32))
3099 | ((input2 >> 8) & (0xFFULL << 40))
3100 | ((input1 >> 8) & (0xFFULL << 48))
3101 | ((input2 >> 0) & (0xFFULL << 56));
3102 break;
3103
3104 case 1:
3105 val1 =
3106 ((input1 << 0) & (0xFFFF << 0))
3107 | ((input2 << 16) & (0xFFFF << 16))
3108 | ((input1 << 16) & (0xFFFFULL << 32))
3109 | ((input2 << 32) & (0xFFFFULL << 48));
3110
3111 val2 =
3112 ((input1 >> 32) & (0xFFFF << 0))
3113 | ((input2 >> 16) & (0xFFFF << 16))
3114 | ((input1 >> 16) & (0xFFFFULL << 32))
3115 | ((input2 >> 0) & (0xFFFFULL << 48));
3116 break;
3117
3118 case 2:
3119 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3120 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3121 break;
3122
3123 case 3:
3124 val1 = input1;
3125 val2 = input2;
3126 break;
3127 }
3128
3129 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3130 if (full)
3131 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3132 }
3133
3134 /* Floating point immediates are encoded in 8 bits.
3135 fpimm[7] = sign bit.
3136 fpimm[6:4] = signed exponent.
3137 fpimm[3:0] = fraction (assuming leading 1).
3138 i.e. F = s * 1.f * 2^(e - b). */
3139
3140 static float
3141 fp_immediate_for_encoding_32 (uint32_t imm8)
3142 {
3143 float u;
3144 uint32_t s, e, f, i;
3145
3146 s = (imm8 >> 7) & 0x1;
3147 e = (imm8 >> 4) & 0x7;
3148 f = imm8 & 0xf;
3149
3150 /* The fp value is s * n/16 * 2r where n is 16+e. */
3151 u = (16.0 + f) / 16.0;
3152
3153 /* N.B. exponent is signed. */
3154 if (e < 4)
3155 {
3156 int epos = e;
3157
3158 for (i = 0; i <= epos; i++)
3159 u *= 2.0;
3160 }
3161 else
3162 {
3163 int eneg = 7 - e;
3164
3165 for (i = 0; i < eneg; i++)
3166 u /= 2.0;
3167 }
3168
3169 if (s)
3170 u = - u;
3171
3172 return u;
3173 }
3174
3175 static double
3176 fp_immediate_for_encoding_64 (uint32_t imm8)
3177 {
3178 double u;
3179 uint32_t s, e, f, i;
3180
3181 s = (imm8 >> 7) & 0x1;
3182 e = (imm8 >> 4) & 0x7;
3183 f = imm8 & 0xf;
3184
3185 /* The fp value is s * n/16 * 2r where n is 16+e. */
3186 u = (16.0 + f) / 16.0;
3187
3188 /* N.B. exponent is signed. */
3189 if (e < 4)
3190 {
3191 int epos = e;
3192
3193 for (i = 0; i <= epos; i++)
3194 u *= 2.0;
3195 }
3196 else
3197 {
3198 int eneg = 7 - e;
3199
3200 for (i = 0; i < eneg; i++)
3201 u /= 2.0;
3202 }
3203
3204 if (s)
3205 u = - u;
3206
3207 return u;
3208 }
3209
3210 static void
3211 do_vec_MOV_immediate (sim_cpu *cpu)
3212 {
3213 /* instr[31] = 0
3214 instr[30] = full/half selector
3215 instr[29,19] = 00111100000
3216 instr[18,16] = high 3 bits of uimm8
3217 instr[15,12] = size & shift:
3218 0000 => 32-bit
3219 0010 => 32-bit + LSL#8
3220 0100 => 32-bit + LSL#16
3221 0110 => 32-bit + LSL#24
3222 1010 => 16-bit + LSL#8
3223 1000 => 16-bit
3224 1101 => 32-bit + MSL#16
3225 1100 => 32-bit + MSL#8
3226 1110 => 8-bit
3227 1111 => double
3228 instr[11,10] = 01
3229 instr[9,5] = low 5-bits of uimm8
3230 instr[4,0] = Vd. */
3231
3232 int full = INSTR (30, 30);
3233 unsigned vd = INSTR (4, 0);
3234 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3235 unsigned i;
3236
3237 NYI_assert (29, 19, 0x1E0);
3238 NYI_assert (11, 10, 1);
3239
3240 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3241 switch (INSTR (15, 12))
3242 {
3243 case 0x0: /* 32-bit, no shift. */
3244 case 0x2: /* 32-bit, shift by 8. */
3245 case 0x4: /* 32-bit, shift by 16. */
3246 case 0x6: /* 32-bit, shift by 24. */
3247 val <<= (8 * INSTR (14, 13));
3248 for (i = 0; i < (full ? 4 : 2); i++)
3249 aarch64_set_vec_u32 (cpu, vd, i, val);
3250 break;
3251
3252 case 0xa: /* 16-bit, shift by 8. */
3253 val <<= 8;
3254 /* Fall through. */
3255 case 0x8: /* 16-bit, no shift. */
3256 for (i = 0; i < (full ? 8 : 4); i++)
3257 aarch64_set_vec_u16 (cpu, vd, i, val);
3258 break;
3259
3260 case 0xd: /* 32-bit, mask shift by 16. */
3261 val <<= 8;
3262 val |= 0xFF;
3263 /* Fall through. */
3264 case 0xc: /* 32-bit, mask shift by 8. */
3265 val <<= 8;
3266 val |= 0xFF;
3267 for (i = 0; i < (full ? 4 : 2); i++)
3268 aarch64_set_vec_u32 (cpu, vd, i, val);
3269 break;
3270
3271 case 0xe: /* 8-bit, no shift. */
3272 for (i = 0; i < (full ? 16 : 8); i++)
3273 aarch64_set_vec_u8 (cpu, vd, i, val);
3274 break;
3275
3276 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3277 {
3278 float u = fp_immediate_for_encoding_32 (val);
3279 for (i = 0; i < (full ? 4 : 2); i++)
3280 aarch64_set_vec_float (cpu, vd, i, u);
3281 break;
3282 }
3283
3284 default:
3285 HALT_NYI;
3286 }
3287 }
3288
3289 static void
3290 do_vec_MVNI (sim_cpu *cpu)
3291 {
3292 /* instr[31] = 0
3293 instr[30] = full/half selector
3294 instr[29,19] = 10111100000
3295 instr[18,16] = high 3 bits of uimm8
3296 instr[15,12] = selector
3297 instr[11,10] = 01
3298 instr[9,5] = low 5-bits of uimm8
3299 instr[4,0] = Vd. */
3300
3301 int full = INSTR (30, 30);
3302 unsigned vd = INSTR (4, 0);
3303 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3304 unsigned i;
3305
3306 NYI_assert (29, 19, 0x5E0);
3307 NYI_assert (11, 10, 1);
3308
3309 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3310 switch (INSTR (15, 12))
3311 {
3312 case 0x0: /* 32-bit, no shift. */
3313 case 0x2: /* 32-bit, shift by 8. */
3314 case 0x4: /* 32-bit, shift by 16. */
3315 case 0x6: /* 32-bit, shift by 24. */
3316 val <<= (8 * INSTR (14, 13));
3317 val = ~ val;
3318 for (i = 0; i < (full ? 4 : 2); i++)
3319 aarch64_set_vec_u32 (cpu, vd, i, val);
3320 return;
3321
3322 case 0xa: /* 16-bit, 8 bit shift. */
3323 val <<= 8;
3324 case 0x8: /* 16-bit, no shift. */
3325 val = ~ val;
3326 for (i = 0; i < (full ? 8 : 4); i++)
3327 aarch64_set_vec_u16 (cpu, vd, i, val);
3328 return;
3329
3330 case 0xd: /* 32-bit, mask shift by 16. */
3331 val <<= 8;
3332 val |= 0xFF;
3333 case 0xc: /* 32-bit, mask shift by 8. */
3334 val <<= 8;
3335 val |= 0xFF;
3336 val = ~ val;
3337 for (i = 0; i < (full ? 4 : 2); i++)
3338 aarch64_set_vec_u32 (cpu, vd, i, val);
3339 return;
3340
3341 case 0xE: /* MOVI Dn, #mask64 */
3342 {
3343 uint64_t mask = 0;
3344
3345 for (i = 0; i < 8; i++)
3346 if (val & (1 << i))
3347 mask |= (0xFFUL << (i * 8));
3348 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3349 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3350 return;
3351 }
3352
3353 case 0xf: /* FMOV Vd.2D, #fpimm. */
3354 {
3355 double u = fp_immediate_for_encoding_64 (val);
3356
3357 if (! full)
3358 HALT_UNALLOC;
3359
3360 aarch64_set_vec_double (cpu, vd, 0, u);
3361 aarch64_set_vec_double (cpu, vd, 1, u);
3362 return;
3363 }
3364
3365 default:
3366 HALT_NYI;
3367 }
3368 }
3369
3370 #define ABS(A) ((A) < 0 ? - (A) : (A))
3371
3372 static void
3373 do_vec_ABS (sim_cpu *cpu)
3374 {
3375 /* instr[31] = 0
3376 instr[30] = half(0)/full(1)
3377 instr[29,24] = 00 1110
3378 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3379 instr[21,10] = 10 0000 1011 10
3380 instr[9,5] = Vn
3381 instr[4.0] = Vd. */
3382
3383 unsigned vn = INSTR (9, 5);
3384 unsigned vd = INSTR (4, 0);
3385 unsigned full = INSTR (30, 30);
3386 unsigned i;
3387
3388 NYI_assert (29, 24, 0x0E);
3389 NYI_assert (21, 10, 0x82E);
3390
3391 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3392 switch (INSTR (23, 22))
3393 {
3394 case 0:
3395 for (i = 0; i < (full ? 16 : 8); i++)
3396 aarch64_set_vec_s8 (cpu, vd, i,
3397 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3398 break;
3399
3400 case 1:
3401 for (i = 0; i < (full ? 8 : 4); i++)
3402 aarch64_set_vec_s16 (cpu, vd, i,
3403 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3404 break;
3405
3406 case 2:
3407 for (i = 0; i < (full ? 4 : 2); i++)
3408 aarch64_set_vec_s32 (cpu, vd, i,
3409 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3410 break;
3411
3412 case 3:
3413 if (! full)
3414 HALT_NYI;
3415 for (i = 0; i < 2; i++)
3416 aarch64_set_vec_s64 (cpu, vd, i,
3417 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3418 break;
3419 }
3420 }
3421
3422 static void
3423 do_vec_ADDV (sim_cpu *cpu)
3424 {
3425 /* instr[31] = 0
3426 instr[30] = full/half selector
3427 instr[29,24] = 00 1110
3428 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3429 instr[21,10] = 11 0001 1011 10
3430 instr[9,5] = Vm
3431 instr[4.0] = Rd. */
3432
3433 unsigned vm = INSTR (9, 5);
3434 unsigned rd = INSTR (4, 0);
3435 unsigned i;
3436 uint64_t val = 0;
3437 int full = INSTR (30, 30);
3438
3439 NYI_assert (29, 24, 0x0E);
3440 NYI_assert (21, 10, 0xC6E);
3441
3442 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3443 switch (INSTR (23, 22))
3444 {
3445 case 0:
3446 for (i = 0; i < (full ? 16 : 8); i++)
3447 val += aarch64_get_vec_u8 (cpu, vm, i);
3448 aarch64_set_vec_u64 (cpu, rd, 0, val);
3449 return;
3450
3451 case 1:
3452 for (i = 0; i < (full ? 8 : 4); i++)
3453 val += aarch64_get_vec_u16 (cpu, vm, i);
3454 aarch64_set_vec_u64 (cpu, rd, 0, val);
3455 return;
3456
3457 case 2:
3458 if (! full)
3459 HALT_UNALLOC;
3460 for (i = 0; i < 4; i++)
3461 val += aarch64_get_vec_u32 (cpu, vm, i);
3462 aarch64_set_vec_u64 (cpu, rd, 0, val);
3463 return;
3464
3465 case 3:
3466 HALT_UNALLOC;
3467 }
3468 }
3469
3470 static void
3471 do_vec_ins_2 (sim_cpu *cpu)
3472 {
3473 /* instr[31,21] = 01001110000
3474 instr[20,18] = size & element selector
3475 instr[17,14] = 0000
3476 instr[13] = direction: to vec(0), from vec (1)
3477 instr[12,10] = 111
3478 instr[9,5] = Vm
3479 instr[4,0] = Vd. */
3480
3481 unsigned elem;
3482 unsigned vm = INSTR (9, 5);
3483 unsigned vd = INSTR (4, 0);
3484
3485 NYI_assert (31, 21, 0x270);
3486 NYI_assert (17, 14, 0);
3487 NYI_assert (12, 10, 7);
3488
3489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3490 if (INSTR (13, 13) == 1)
3491 {
3492 if (INSTR (18, 18) == 1)
3493 {
3494 /* 32-bit moves. */
3495 elem = INSTR (20, 19);
3496 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3497 aarch64_get_vec_u32 (cpu, vm, elem));
3498 }
3499 else
3500 {
3501 /* 64-bit moves. */
3502 if (INSTR (19, 19) != 1)
3503 HALT_NYI;
3504
3505 elem = INSTR (20, 20);
3506 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3507 aarch64_get_vec_u64 (cpu, vm, elem));
3508 }
3509 }
3510 else
3511 {
3512 if (INSTR (18, 18) == 1)
3513 {
3514 /* 32-bit moves. */
3515 elem = INSTR (20, 19);
3516 aarch64_set_vec_u32 (cpu, vd, elem,
3517 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3518 }
3519 else
3520 {
3521 /* 64-bit moves. */
3522 if (INSTR (19, 19) != 1)
3523 HALT_NYI;
3524
3525 elem = INSTR (20, 20);
3526 aarch64_set_vec_u64 (cpu, vd, elem,
3527 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3528 }
3529 }
3530 }
3531
3532 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3533 do \
3534 { \
3535 DST_TYPE a[N], b[N]; \
3536 \
3537 for (i = 0; i < (N); i++) \
3538 { \
3539 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3540 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3541 } \
3542 for (i = 0; i < (N); i++) \
3543 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3544 } \
3545 while (0)
3546
3547 static void
3548 do_vec_mull (sim_cpu *cpu)
3549 {
3550 /* instr[31] = 0
3551 instr[30] = lower(0)/upper(1) selector
3552 instr[29] = signed(0)/unsigned(1)
3553 instr[28,24] = 0 1110
3554 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3555 instr[21] = 1
3556 instr[20,16] = Vm
3557 instr[15,10] = 11 0000
3558 instr[9,5] = Vn
3559 instr[4.0] = Vd. */
3560
3561 int unsign = INSTR (29, 29);
3562 int bias = INSTR (30, 30);
3563 unsigned vm = INSTR (20, 16);
3564 unsigned vn = INSTR ( 9, 5);
3565 unsigned vd = INSTR ( 4, 0);
3566 unsigned i;
3567
3568 NYI_assert (28, 24, 0x0E);
3569 NYI_assert (15, 10, 0x30);
3570
3571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3572 /* NB: Read source values before writing results, in case
3573 the source and destination vectors are the same. */
3574 switch (INSTR (23, 22))
3575 {
3576 case 0:
3577 if (bias)
3578 bias = 8;
3579 if (unsign)
3580 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3581 else
3582 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3583 return;
3584
3585 case 1:
3586 if (bias)
3587 bias = 4;
3588 if (unsign)
3589 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3590 else
3591 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3592 return;
3593
3594 case 2:
3595 if (bias)
3596 bias = 2;
3597 if (unsign)
3598 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3599 else
3600 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3601 return;
3602
3603 case 3:
3604 HALT_NYI;
3605 }
3606 }
3607
3608 static void
3609 do_vec_fadd (sim_cpu *cpu)
3610 {
3611 /* instr[31] = 0
3612 instr[30] = half(0)/full(1)
3613 instr[29,24] = 001110
3614 instr[23] = FADD(0)/FSUB(1)
3615 instr[22] = float (0)/double(1)
3616 instr[21] = 1
3617 instr[20,16] = Vm
3618 instr[15,10] = 110101
3619 instr[9,5] = Vn
3620 instr[4.0] = Vd. */
3621
3622 unsigned vm = INSTR (20, 16);
3623 unsigned vn = INSTR (9, 5);
3624 unsigned vd = INSTR (4, 0);
3625 unsigned i;
3626 int full = INSTR (30, 30);
3627
3628 NYI_assert (29, 24, 0x0E);
3629 NYI_assert (21, 21, 1);
3630 NYI_assert (15, 10, 0x35);
3631
3632 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3633 if (INSTR (23, 23))
3634 {
3635 if (INSTR (22, 22))
3636 {
3637 if (! full)
3638 HALT_NYI;
3639
3640 for (i = 0; i < 2; i++)
3641 aarch64_set_vec_double (cpu, vd, i,
3642 aarch64_get_vec_double (cpu, vn, i)
3643 - aarch64_get_vec_double (cpu, vm, i));
3644 }
3645 else
3646 {
3647 for (i = 0; i < (full ? 4 : 2); i++)
3648 aarch64_set_vec_float (cpu, vd, i,
3649 aarch64_get_vec_float (cpu, vn, i)
3650 - aarch64_get_vec_float (cpu, vm, i));
3651 }
3652 }
3653 else
3654 {
3655 if (INSTR (22, 22))
3656 {
3657 if (! full)
3658 HALT_NYI;
3659
3660 for (i = 0; i < 2; i++)
3661 aarch64_set_vec_double (cpu, vd, i,
3662 aarch64_get_vec_double (cpu, vm, i)
3663 + aarch64_get_vec_double (cpu, vn, i));
3664 }
3665 else
3666 {
3667 for (i = 0; i < (full ? 4 : 2); i++)
3668 aarch64_set_vec_float (cpu, vd, i,
3669 aarch64_get_vec_float (cpu, vm, i)
3670 + aarch64_get_vec_float (cpu, vn, i));
3671 }
3672 }
3673 }
3674
3675 static void
3676 do_vec_add (sim_cpu *cpu)
3677 {
3678 /* instr[31] = 0
3679 instr[30] = full/half selector
3680 instr[29,24] = 001110
3681 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3682 instr[21] = 1
3683 instr[20,16] = Vn
3684 instr[15,10] = 100001
3685 instr[9,5] = Vm
3686 instr[4.0] = Vd. */
3687
3688 unsigned vm = INSTR (20, 16);
3689 unsigned vn = INSTR (9, 5);
3690 unsigned vd = INSTR (4, 0);
3691 unsigned i;
3692 int full = INSTR (30, 30);
3693
3694 NYI_assert (29, 24, 0x0E);
3695 NYI_assert (21, 21, 1);
3696 NYI_assert (15, 10, 0x21);
3697
3698 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3699 switch (INSTR (23, 22))
3700 {
3701 case 0:
3702 for (i = 0; i < (full ? 16 : 8); i++)
3703 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3704 + aarch64_get_vec_u8 (cpu, vm, i));
3705 return;
3706
3707 case 1:
3708 for (i = 0; i < (full ? 8 : 4); i++)
3709 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3710 + aarch64_get_vec_u16 (cpu, vm, i));
3711 return;
3712
3713 case 2:
3714 for (i = 0; i < (full ? 4 : 2); i++)
3715 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3716 + aarch64_get_vec_u32 (cpu, vm, i));
3717 return;
3718
3719 case 3:
3720 if (! full)
3721 HALT_UNALLOC;
3722 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3723 + aarch64_get_vec_u64 (cpu, vm, 0));
3724 aarch64_set_vec_u64 (cpu, vd, 1,
3725 aarch64_get_vec_u64 (cpu, vn, 1)
3726 + aarch64_get_vec_u64 (cpu, vm, 1));
3727 return;
3728 }
3729 }
3730
3731 static void
3732 do_vec_mul (sim_cpu *cpu)
3733 {
3734 /* instr[31] = 0
3735 instr[30] = full/half selector
3736 instr[29,24] = 00 1110
3737 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3738 instr[21] = 1
3739 instr[20,16] = Vn
3740 instr[15,10] = 10 0111
3741 instr[9,5] = Vm
3742 instr[4.0] = Vd. */
3743
3744 unsigned vm = INSTR (20, 16);
3745 unsigned vn = INSTR (9, 5);
3746 unsigned vd = INSTR (4, 0);
3747 unsigned i;
3748 int full = INSTR (30, 30);
3749 int bias = 0;
3750
3751 NYI_assert (29, 24, 0x0E);
3752 NYI_assert (21, 21, 1);
3753 NYI_assert (15, 10, 0x27);
3754
3755 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3756 switch (INSTR (23, 22))
3757 {
3758 case 0:
3759 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3760 return;
3761
3762 case 1:
3763 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3764 return;
3765
3766 case 2:
3767 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3768 return;
3769
3770 case 3:
3771 HALT_UNALLOC;
3772 }
3773 }
3774
3775 static void
3776 do_vec_MLA (sim_cpu *cpu)
3777 {
3778 /* instr[31] = 0
3779 instr[30] = full/half selector
3780 instr[29,24] = 00 1110
3781 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3782 instr[21] = 1
3783 instr[20,16] = Vn
3784 instr[15,10] = 1001 01
3785 instr[9,5] = Vm
3786 instr[4.0] = Vd. */
3787
3788 unsigned vm = INSTR (20, 16);
3789 unsigned vn = INSTR (9, 5);
3790 unsigned vd = INSTR (4, 0);
3791 unsigned i;
3792 int full = INSTR (30, 30);
3793
3794 NYI_assert (29, 24, 0x0E);
3795 NYI_assert (21, 21, 1);
3796 NYI_assert (15, 10, 0x25);
3797
3798 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3799 switch (INSTR (23, 22))
3800 {
3801 case 0:
3802 for (i = 0; i < (full ? 16 : 8); i++)
3803 aarch64_set_vec_u8 (cpu, vd, i,
3804 aarch64_get_vec_u8 (cpu, vd, i)
3805 + (aarch64_get_vec_u8 (cpu, vn, i)
3806 * aarch64_get_vec_u8 (cpu, vm, i)));
3807 return;
3808
3809 case 1:
3810 for (i = 0; i < (full ? 8 : 4); i++)
3811 aarch64_set_vec_u16 (cpu, vd, i,
3812 aarch64_get_vec_u16 (cpu, vd, i)
3813 + (aarch64_get_vec_u16 (cpu, vn, i)
3814 * aarch64_get_vec_u16 (cpu, vm, i)));
3815 return;
3816
3817 case 2:
3818 for (i = 0; i < (full ? 4 : 2); i++)
3819 aarch64_set_vec_u32 (cpu, vd, i,
3820 aarch64_get_vec_u32 (cpu, vd, i)
3821 + (aarch64_get_vec_u32 (cpu, vn, i)
3822 * aarch64_get_vec_u32 (cpu, vm, i)));
3823 return;
3824
3825 default:
3826 HALT_UNALLOC;
3827 }
3828 }
3829
3830 static float
3831 fmaxnm (float a, float b)
3832 {
3833 if (! isnan (a))
3834 {
3835 if (! isnan (b))
3836 return a > b ? a : b;
3837 return a;
3838 }
3839 else if (! isnan (b))
3840 return b;
3841 return a;
3842 }
3843
3844 static float
3845 fminnm (float a, float b)
3846 {
3847 if (! isnan (a))
3848 {
3849 if (! isnan (b))
3850 return a < b ? a : b;
3851 return a;
3852 }
3853 else if (! isnan (b))
3854 return b;
3855 return a;
3856 }
3857
3858 static double
3859 dmaxnm (double a, double b)
3860 {
3861 if (! isnan (a))
3862 {
3863 if (! isnan (b))
3864 return a > b ? a : b;
3865 return a;
3866 }
3867 else if (! isnan (b))
3868 return b;
3869 return a;
3870 }
3871
3872 static double
3873 dminnm (double a, double b)
3874 {
3875 if (! isnan (a))
3876 {
3877 if (! isnan (b))
3878 return a < b ? a : b;
3879 return a;
3880 }
3881 else if (! isnan (b))
3882 return b;
3883 return a;
3884 }
3885
3886 static void
3887 do_vec_FminmaxNMP (sim_cpu *cpu)
3888 {
3889 /* instr [31] = 0
3890 instr [30] = half (0)/full (1)
3891 instr [29,24] = 10 1110
3892 instr [23] = max(0)/min(1)
3893 instr [22] = float (0)/double (1)
3894 instr [21] = 1
3895 instr [20,16] = Vn
3896 instr [15,10] = 1100 01
3897 instr [9,5] = Vm
3898 instr [4.0] = Vd. */
3899
3900 unsigned vm = INSTR (20, 16);
3901 unsigned vn = INSTR (9, 5);
3902 unsigned vd = INSTR (4, 0);
3903 int full = INSTR (30, 30);
3904
3905 NYI_assert (29, 24, 0x2E);
3906 NYI_assert (21, 21, 1);
3907 NYI_assert (15, 10, 0x31);
3908
3909 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3910 if (INSTR (22, 22))
3911 {
3912 double (* fn)(double, double) = INSTR (23, 23)
3913 ? dminnm : dmaxnm;
3914
3915 if (! full)
3916 HALT_NYI;
3917 aarch64_set_vec_double (cpu, vd, 0,
3918 fn (aarch64_get_vec_double (cpu, vn, 0),
3919 aarch64_get_vec_double (cpu, vn, 1)));
3920 aarch64_set_vec_double (cpu, vd, 0,
3921 fn (aarch64_get_vec_double (cpu, vm, 0),
3922 aarch64_get_vec_double (cpu, vm, 1)));
3923 }
3924 else
3925 {
3926 float (* fn)(float, float) = INSTR (23, 23)
3927 ? fminnm : fmaxnm;
3928
3929 aarch64_set_vec_float (cpu, vd, 0,
3930 fn (aarch64_get_vec_float (cpu, vn, 0),
3931 aarch64_get_vec_float (cpu, vn, 1)));
3932 if (full)
3933 aarch64_set_vec_float (cpu, vd, 1,
3934 fn (aarch64_get_vec_float (cpu, vn, 2),
3935 aarch64_get_vec_float (cpu, vn, 3)));
3936
3937 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
3938 fn (aarch64_get_vec_float (cpu, vm, 0),
3939 aarch64_get_vec_float (cpu, vm, 1)));
3940 if (full)
3941 aarch64_set_vec_float (cpu, vd, 3,
3942 fn (aarch64_get_vec_float (cpu, vm, 2),
3943 aarch64_get_vec_float (cpu, vm, 3)));
3944 }
3945 }
3946
3947 static void
3948 do_vec_AND (sim_cpu *cpu)
3949 {
3950 /* instr[31] = 0
3951 instr[30] = half (0)/full (1)
3952 instr[29,21] = 001110001
3953 instr[20,16] = Vm
3954 instr[15,10] = 000111
3955 instr[9,5] = Vn
3956 instr[4.0] = Vd. */
3957
3958 unsigned vm = INSTR (20, 16);
3959 unsigned vn = INSTR (9, 5);
3960 unsigned vd = INSTR (4, 0);
3961 unsigned i;
3962 int full = INSTR (30, 30);
3963
3964 NYI_assert (29, 21, 0x071);
3965 NYI_assert (15, 10, 0x07);
3966
3967 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3968 for (i = 0; i < (full ? 4 : 2); i++)
3969 aarch64_set_vec_u32 (cpu, vd, i,
3970 aarch64_get_vec_u32 (cpu, vn, i)
3971 & aarch64_get_vec_u32 (cpu, vm, i));
3972 }
3973
3974 static void
3975 do_vec_BSL (sim_cpu *cpu)
3976 {
3977 /* instr[31] = 0
3978 instr[30] = half (0)/full (1)
3979 instr[29,21] = 101110011
3980 instr[20,16] = Vm
3981 instr[15,10] = 000111
3982 instr[9,5] = Vn
3983 instr[4.0] = Vd. */
3984
3985 unsigned vm = INSTR (20, 16);
3986 unsigned vn = INSTR (9, 5);
3987 unsigned vd = INSTR (4, 0);
3988 unsigned i;
3989 int full = INSTR (30, 30);
3990
3991 NYI_assert (29, 21, 0x173);
3992 NYI_assert (15, 10, 0x07);
3993
3994 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3995 for (i = 0; i < (full ? 16 : 8); i++)
3996 aarch64_set_vec_u8 (cpu, vd, i,
3997 ( aarch64_get_vec_u8 (cpu, vd, i)
3998 & aarch64_get_vec_u8 (cpu, vn, i))
3999 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4000 & aarch64_get_vec_u8 (cpu, vm, i)));
4001 }
4002
4003 static void
4004 do_vec_EOR (sim_cpu *cpu)
4005 {
4006 /* instr[31] = 0
4007 instr[30] = half (0)/full (1)
4008 instr[29,21] = 10 1110 001
4009 instr[20,16] = Vm
4010 instr[15,10] = 000111
4011 instr[9,5] = Vn
4012 instr[4.0] = Vd. */
4013
4014 unsigned vm = INSTR (20, 16);
4015 unsigned vn = INSTR (9, 5);
4016 unsigned vd = INSTR (4, 0);
4017 unsigned i;
4018 int full = INSTR (30, 30);
4019
4020 NYI_assert (29, 21, 0x171);
4021 NYI_assert (15, 10, 0x07);
4022
4023 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4024 for (i = 0; i < (full ? 4 : 2); i++)
4025 aarch64_set_vec_u32 (cpu, vd, i,
4026 aarch64_get_vec_u32 (cpu, vn, i)
4027 ^ aarch64_get_vec_u32 (cpu, vm, i));
4028 }
4029
4030 static void
4031 do_vec_bit (sim_cpu *cpu)
4032 {
4033 /* instr[31] = 0
4034 instr[30] = half (0)/full (1)
4035 instr[29,23] = 10 1110 1
4036 instr[22] = BIT (0) / BIF (1)
4037 instr[21] = 1
4038 instr[20,16] = Vm
4039 instr[15,10] = 0001 11
4040 instr[9,5] = Vn
4041 instr[4.0] = Vd. */
4042
4043 unsigned vm = INSTR (20, 16);
4044 unsigned vn = INSTR (9, 5);
4045 unsigned vd = INSTR (4, 0);
4046 unsigned full = INSTR (30, 30);
4047 unsigned test_false = INSTR (22, 22);
4048 unsigned i;
4049
4050 NYI_assert (29, 23, 0x5D);
4051 NYI_assert (21, 21, 1);
4052 NYI_assert (15, 10, 0x07);
4053
4054 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4055 for (i = 0; i < (full ? 4 : 2); i++)
4056 {
4057 uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
4058 uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
4059 uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
4060 if (test_false)
4061 aarch64_set_vec_u32 (cpu, vd, i,
4062 (vd_val & vm_val) | (vn_val & ~vm_val));
4063 else
4064 aarch64_set_vec_u32 (cpu, vd, i,
4065 (vd_val & ~vm_val) | (vn_val & vm_val));
4066 }
4067 }
4068
4069 static void
4070 do_vec_ORN (sim_cpu *cpu)
4071 {
4072 /* instr[31] = 0
4073 instr[30] = half (0)/full (1)
4074 instr[29,21] = 00 1110 111
4075 instr[20,16] = Vm
4076 instr[15,10] = 00 0111
4077 instr[9,5] = Vn
4078 instr[4.0] = Vd. */
4079
4080 unsigned vm = INSTR (20, 16);
4081 unsigned vn = INSTR (9, 5);
4082 unsigned vd = INSTR (4, 0);
4083 unsigned i;
4084 int full = INSTR (30, 30);
4085
4086 NYI_assert (29, 21, 0x077);
4087 NYI_assert (15, 10, 0x07);
4088
4089 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4090 for (i = 0; i < (full ? 16 : 8); i++)
4091 aarch64_set_vec_u8 (cpu, vd, i,
4092 aarch64_get_vec_u8 (cpu, vn, i)
4093 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4094 }
4095
4096 static void
4097 do_vec_ORR (sim_cpu *cpu)
4098 {
4099 /* instr[31] = 0
4100 instr[30] = half (0)/full (1)
4101 instr[29,21] = 00 1110 101
4102 instr[20,16] = Vm
4103 instr[15,10] = 0001 11
4104 instr[9,5] = Vn
4105 instr[4.0] = Vd. */
4106
4107 unsigned vm = INSTR (20, 16);
4108 unsigned vn = INSTR (9, 5);
4109 unsigned vd = INSTR (4, 0);
4110 unsigned i;
4111 int full = INSTR (30, 30);
4112
4113 NYI_assert (29, 21, 0x075);
4114 NYI_assert (15, 10, 0x07);
4115
4116 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4117 for (i = 0; i < (full ? 16 : 8); i++)
4118 aarch64_set_vec_u8 (cpu, vd, i,
4119 aarch64_get_vec_u8 (cpu, vn, i)
4120 | aarch64_get_vec_u8 (cpu, vm, i));
4121 }
4122
4123 static void
4124 do_vec_BIC (sim_cpu *cpu)
4125 {
4126 /* instr[31] = 0
4127 instr[30] = half (0)/full (1)
4128 instr[29,21] = 00 1110 011
4129 instr[20,16] = Vm
4130 instr[15,10] = 00 0111
4131 instr[9,5] = Vn
4132 instr[4.0] = Vd. */
4133
4134 unsigned vm = INSTR (20, 16);
4135 unsigned vn = INSTR (9, 5);
4136 unsigned vd = INSTR (4, 0);
4137 unsigned i;
4138 int full = INSTR (30, 30);
4139
4140 NYI_assert (29, 21, 0x073);
4141 NYI_assert (15, 10, 0x07);
4142
4143 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4144 for (i = 0; i < (full ? 16 : 8); i++)
4145 aarch64_set_vec_u8 (cpu, vd, i,
4146 aarch64_get_vec_u8 (cpu, vn, i)
4147 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4148 }
4149
4150 static void
4151 do_vec_XTN (sim_cpu *cpu)
4152 {
4153 /* instr[31] = 0
4154 instr[30] = first part (0)/ second part (1)
4155 instr[29,24] = 00 1110
4156 instr[23,22] = size: byte(00), half(01), word (10)
4157 instr[21,10] = 1000 0100 1010
4158 instr[9,5] = Vs
4159 instr[4,0] = Vd. */
4160
4161 unsigned vs = INSTR (9, 5);
4162 unsigned vd = INSTR (4, 0);
4163 unsigned bias = INSTR (30, 30);
4164 unsigned i;
4165
4166 NYI_assert (29, 24, 0x0E);
4167 NYI_assert (21, 10, 0x84A);
4168
4169 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4170 switch (INSTR (23, 22))
4171 {
4172 case 0:
4173 for (i = 0; i < 8; i++)
4174 aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4175 aarch64_get_vec_u16 (cpu, vs, i));
4176 return;
4177
4178 case 1:
4179 for (i = 0; i < 4; i++)
4180 aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4181 aarch64_get_vec_u32 (cpu, vs, i));
4182 return;
4183
4184 case 2:
4185 for (i = 0; i < 2; i++)
4186 aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4187 aarch64_get_vec_u64 (cpu, vs, i));
4188 return;
4189 }
4190 }
4191
4192 static void
4193 do_vec_maxv (sim_cpu *cpu)
4194 {
4195 /* instr[31] = 0
4196 instr[30] = half(0)/full(1)
4197 instr[29] = signed (0)/unsigned(1)
4198 instr[28,24] = 0 1110
4199 instr[23,22] = size: byte(00), half(01), word (10)
4200 instr[21] = 1
4201 instr[20,17] = 1 000
4202 instr[16] = max(0)/min(1)
4203 instr[15,10] = 1010 10
4204 instr[9,5] = V source
4205 instr[4.0] = R dest. */
4206
4207 unsigned vs = INSTR (9, 5);
4208 unsigned rd = INSTR (4, 0);
4209 unsigned full = INSTR (30, 30);
4210 unsigned i;
4211
4212 NYI_assert (28, 24, 0x0E);
4213 NYI_assert (21, 21, 1);
4214 NYI_assert (20, 17, 8);
4215 NYI_assert (15, 10, 0x2A);
4216
4217 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4218 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4219 {
4220 case 0: /* SMAXV. */
4221 {
4222 int64_t smax;
4223 switch (INSTR (23, 22))
4224 {
4225 case 0:
4226 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4227 for (i = 1; i < (full ? 16 : 8); i++)
4228 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4229 break;
4230 case 1:
4231 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4232 for (i = 1; i < (full ? 8 : 4); i++)
4233 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4234 break;
4235 case 2:
4236 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4237 for (i = 1; i < (full ? 4 : 2); i++)
4238 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4239 break;
4240 case 3:
4241 HALT_UNALLOC;
4242 }
4243 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4244 return;
4245 }
4246
4247 case 1: /* SMINV. */
4248 {
4249 int64_t smin;
4250 switch (INSTR (23, 22))
4251 {
4252 case 0:
4253 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4254 for (i = 1; i < (full ? 16 : 8); i++)
4255 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4256 break;
4257 case 1:
4258 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4259 for (i = 1; i < (full ? 8 : 4); i++)
4260 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4261 break;
4262 case 2:
4263 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4264 for (i = 1; i < (full ? 4 : 2); i++)
4265 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4266 break;
4267
4268 case 3:
4269 HALT_UNALLOC;
4270 }
4271 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4272 return;
4273 }
4274
4275 case 2: /* UMAXV. */
4276 {
4277 uint64_t umax;
4278 switch (INSTR (23, 22))
4279 {
4280 case 0:
4281 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4282 for (i = 1; i < (full ? 16 : 8); i++)
4283 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4284 break;
4285 case 1:
4286 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4287 for (i = 1; i < (full ? 8 : 4); i++)
4288 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4289 break;
4290 case 2:
4291 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4292 for (i = 1; i < (full ? 4 : 2); i++)
4293 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4294 break;
4295
4296 case 3:
4297 HALT_UNALLOC;
4298 }
4299 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4300 return;
4301 }
4302
4303 case 3: /* UMINV. */
4304 {
4305 uint64_t umin;
4306 switch (INSTR (23, 22))
4307 {
4308 case 0:
4309 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4310 for (i = 1; i < (full ? 16 : 8); i++)
4311 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4312 break;
4313 case 1:
4314 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4315 for (i = 1; i < (full ? 8 : 4); i++)
4316 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4317 break;
4318 case 2:
4319 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4320 for (i = 1; i < (full ? 4 : 2); i++)
4321 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4322 break;
4323
4324 case 3:
4325 HALT_UNALLOC;
4326 }
4327 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4328 return;
4329 }
4330 }
4331 }
4332
4333 static void
4334 do_vec_fminmaxV (sim_cpu *cpu)
4335 {
4336 /* instr[31,24] = 0110 1110
4337 instr[23] = max(0)/min(1)
4338 instr[22,14] = 011 0000 11
4339 instr[13,12] = nm(00)/normal(11)
4340 instr[11,10] = 10
4341 instr[9,5] = V source
4342 instr[4.0] = R dest. */
4343
4344 unsigned vs = INSTR (9, 5);
4345 unsigned rd = INSTR (4, 0);
4346 unsigned i;
4347 float res = aarch64_get_vec_float (cpu, vs, 0);
4348
4349 NYI_assert (31, 24, 0x6E);
4350 NYI_assert (22, 14, 0x0C3);
4351 NYI_assert (11, 10, 2);
4352
4353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4354 if (INSTR (23, 23))
4355 {
4356 switch (INSTR (13, 12))
4357 {
4358 case 0: /* FMNINNMV. */
4359 for (i = 1; i < 4; i++)
4360 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4361 break;
4362
4363 case 3: /* FMINV. */
4364 for (i = 1; i < 4; i++)
4365 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4366 break;
4367
4368 default:
4369 HALT_NYI;
4370 }
4371 }
4372 else
4373 {
4374 switch (INSTR (13, 12))
4375 {
4376 case 0: /* FMNAXNMV. */
4377 for (i = 1; i < 4; i++)
4378 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4379 break;
4380
4381 case 3: /* FMAXV. */
4382 for (i = 1; i < 4; i++)
4383 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4384 break;
4385
4386 default:
4387 HALT_NYI;
4388 }
4389 }
4390
4391 aarch64_set_FP_float (cpu, rd, res);
4392 }
4393
4394 static void
4395 do_vec_Fminmax (sim_cpu *cpu)
4396 {
4397 /* instr[31] = 0
4398 instr[30] = half(0)/full(1)
4399 instr[29,24] = 00 1110
4400 instr[23] = max(0)/min(1)
4401 instr[22] = float(0)/double(1)
4402 instr[21] = 1
4403 instr[20,16] = Vm
4404 instr[15,14] = 11
4405 instr[13,12] = nm(00)/normal(11)
4406 instr[11,10] = 01
4407 instr[9,5] = Vn
4408 instr[4,0] = Vd. */
4409
4410 unsigned vm = INSTR (20, 16);
4411 unsigned vn = INSTR (9, 5);
4412 unsigned vd = INSTR (4, 0);
4413 unsigned full = INSTR (30, 30);
4414 unsigned min = INSTR (23, 23);
4415 unsigned i;
4416
4417 NYI_assert (29, 24, 0x0E);
4418 NYI_assert (21, 21, 1);
4419 NYI_assert (15, 14, 3);
4420 NYI_assert (11, 10, 1);
4421
4422 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4423 if (INSTR (22, 22))
4424 {
4425 double (* func)(double, double);
4426
4427 if (! full)
4428 HALT_NYI;
4429
4430 if (INSTR (13, 12) == 0)
4431 func = min ? dminnm : dmaxnm;
4432 else if (INSTR (13, 12) == 3)
4433 func = min ? fmin : fmax;
4434 else
4435 HALT_NYI;
4436
4437 for (i = 0; i < 2; i++)
4438 aarch64_set_vec_double (cpu, vd, i,
4439 func (aarch64_get_vec_double (cpu, vn, i),
4440 aarch64_get_vec_double (cpu, vm, i)));
4441 }
4442 else
4443 {
4444 float (* func)(float, float);
4445
4446 if (INSTR (13, 12) == 0)
4447 func = min ? fminnm : fmaxnm;
4448 else if (INSTR (13, 12) == 3)
4449 func = min ? fminf : fmaxf;
4450 else
4451 HALT_NYI;
4452
4453 for (i = 0; i < (full ? 4 : 2); i++)
4454 aarch64_set_vec_float (cpu, vd, i,
4455 func (aarch64_get_vec_float (cpu, vn, i),
4456 aarch64_get_vec_float (cpu, vm, i)));
4457 }
4458 }
4459
4460 static void
4461 do_vec_SCVTF (sim_cpu *cpu)
4462 {
4463 /* instr[31] = 0
4464 instr[30] = Q
4465 instr[29,23] = 00 1110 0
4466 instr[22] = float(0)/double(1)
4467 instr[21,10] = 10 0001 1101 10
4468 instr[9,5] = Vn
4469 instr[4,0] = Vd. */
4470
4471 unsigned vn = INSTR (9, 5);
4472 unsigned vd = INSTR (4, 0);
4473 unsigned full = INSTR (30, 30);
4474 unsigned size = INSTR (22, 22);
4475 unsigned i;
4476
4477 NYI_assert (29, 23, 0x1C);
4478 NYI_assert (21, 10, 0x876);
4479
4480 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4481 if (size)
4482 {
4483 if (! full)
4484 HALT_UNALLOC;
4485
4486 for (i = 0; i < 2; i++)
4487 {
4488 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4489 aarch64_set_vec_double (cpu, vd, i, val);
4490 }
4491 }
4492 else
4493 {
4494 for (i = 0; i < (full ? 4 : 2); i++)
4495 {
4496 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4497 aarch64_set_vec_float (cpu, vd, i, val);
4498 }
4499 }
4500 }
4501
4502 #define VEC_CMP(SOURCE, CMP) \
4503 do \
4504 { \
4505 switch (size) \
4506 { \
4507 case 0: \
4508 for (i = 0; i < (full ? 16 : 8); i++) \
4509 aarch64_set_vec_u8 (cpu, vd, i, \
4510 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4511 CMP \
4512 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4513 ? -1 : 0); \
4514 return; \
4515 case 1: \
4516 for (i = 0; i < (full ? 8 : 4); i++) \
4517 aarch64_set_vec_u16 (cpu, vd, i, \
4518 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4519 CMP \
4520 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4521 ? -1 : 0); \
4522 return; \
4523 case 2: \
4524 for (i = 0; i < (full ? 4 : 2); i++) \
4525 aarch64_set_vec_u32 (cpu, vd, i, \
4526 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4527 CMP \
4528 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4529 ? -1 : 0); \
4530 return; \
4531 case 3: \
4532 if (! full) \
4533 HALT_UNALLOC; \
4534 for (i = 0; i < 2; i++) \
4535 aarch64_set_vec_u64 (cpu, vd, i, \
4536 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4537 CMP \
4538 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4539 ? -1ULL : 0); \
4540 return; \
4541 } \
4542 } \
4543 while (0)
4544
4545 #define VEC_CMP0(SOURCE, CMP) \
4546 do \
4547 { \
4548 switch (size) \
4549 { \
4550 case 0: \
4551 for (i = 0; i < (full ? 16 : 8); i++) \
4552 aarch64_set_vec_u8 (cpu, vd, i, \
4553 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4554 CMP 0 ? -1 : 0); \
4555 return; \
4556 case 1: \
4557 for (i = 0; i < (full ? 8 : 4); i++) \
4558 aarch64_set_vec_u16 (cpu, vd, i, \
4559 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4560 CMP 0 ? -1 : 0); \
4561 return; \
4562 case 2: \
4563 for (i = 0; i < (full ? 4 : 2); i++) \
4564 aarch64_set_vec_u32 (cpu, vd, i, \
4565 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4566 CMP 0 ? -1 : 0); \
4567 return; \
4568 case 3: \
4569 if (! full) \
4570 HALT_UNALLOC; \
4571 for (i = 0; i < 2; i++) \
4572 aarch64_set_vec_u64 (cpu, vd, i, \
4573 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4574 CMP 0 ? -1ULL : 0); \
4575 return; \
4576 } \
4577 } \
4578 while (0)
4579
4580 #define VEC_FCMP0(CMP) \
4581 do \
4582 { \
4583 if (vm != 0) \
4584 HALT_NYI; \
4585 if (INSTR (22, 22)) \
4586 { \
4587 if (! full) \
4588 HALT_NYI; \
4589 for (i = 0; i < 2; i++) \
4590 aarch64_set_vec_u64 (cpu, vd, i, \
4591 aarch64_get_vec_double (cpu, vn, i) \
4592 CMP 0.0 ? -1 : 0); \
4593 } \
4594 else \
4595 { \
4596 for (i = 0; i < (full ? 4 : 2); i++) \
4597 aarch64_set_vec_u32 (cpu, vd, i, \
4598 aarch64_get_vec_float (cpu, vn, i) \
4599 CMP 0.0 ? -1 : 0); \
4600 } \
4601 return; \
4602 } \
4603 while (0)
4604
4605 #define VEC_FCMP(CMP) \
4606 do \
4607 { \
4608 if (INSTR (22, 22)) \
4609 { \
4610 if (! full) \
4611 HALT_NYI; \
4612 for (i = 0; i < 2; i++) \
4613 aarch64_set_vec_u64 (cpu, vd, i, \
4614 aarch64_get_vec_double (cpu, vn, i) \
4615 CMP \
4616 aarch64_get_vec_double (cpu, vm, i) \
4617 ? -1 : 0); \
4618 } \
4619 else \
4620 { \
4621 for (i = 0; i < (full ? 4 : 2); i++) \
4622 aarch64_set_vec_u32 (cpu, vd, i, \
4623 aarch64_get_vec_float (cpu, vn, i) \
4624 CMP \
4625 aarch64_get_vec_float (cpu, vm, i) \
4626 ? -1 : 0); \
4627 } \
4628 return; \
4629 } \
4630 while (0)
4631
4632 static void
4633 do_vec_compare (sim_cpu *cpu)
4634 {
4635 /* instr[31] = 0
4636 instr[30] = half(0)/full(1)
4637 instr[29] = part-of-comparison-type
4638 instr[28,24] = 0 1110
4639 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4640 type of float compares: single (-0) / double (-1)
4641 instr[21] = 1
4642 instr[20,16] = Vm or 00000 (compare vs 0)
4643 instr[15,10] = part-of-comparison-type
4644 instr[9,5] = Vn
4645 instr[4.0] = Vd. */
4646
4647 int full = INSTR (30, 30);
4648 int size = INSTR (23, 22);
4649 unsigned vm = INSTR (20, 16);
4650 unsigned vn = INSTR (9, 5);
4651 unsigned vd = INSTR (4, 0);
4652 unsigned i;
4653
4654 NYI_assert (28, 24, 0x0E);
4655 NYI_assert (21, 21, 1);
4656
4657 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4658 if ((INSTR (11, 11)
4659 && INSTR (14, 14))
4660 || ((INSTR (11, 11) == 0
4661 && INSTR (10, 10) == 0)))
4662 {
4663 /* A compare vs 0. */
4664 if (vm != 0)
4665 {
4666 if (INSTR (15, 10) == 0x2A)
4667 do_vec_maxv (cpu);
4668 else if (INSTR (15, 10) == 0x32
4669 || INSTR (15, 10) == 0x3E)
4670 do_vec_fminmaxV (cpu);
4671 else if (INSTR (29, 23) == 0x1C
4672 && INSTR (21, 10) == 0x876)
4673 do_vec_SCVTF (cpu);
4674 else
4675 HALT_NYI;
4676 return;
4677 }
4678 }
4679
4680 if (INSTR (14, 14))
4681 {
4682 /* A floating point compare. */
4683 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4684 | INSTR (13, 10);
4685
4686 NYI_assert (15, 15, 1);
4687
4688 switch (decode)
4689 {
4690 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4691 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4692 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4693 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4694 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4695 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4696 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4697 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4698
4699 default:
4700 HALT_NYI;
4701 }
4702 }
4703 else
4704 {
4705 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4706
4707 switch (decode)
4708 {
4709 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4710 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4711 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4712 case 0x23: /* 0100011 TST */ VEC_CMP (u, & );
4713 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4714 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4715 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4716 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4717 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4718 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4719 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4720 default:
4721 if (vm == 0)
4722 HALT_NYI;
4723 do_vec_maxv (cpu);
4724 }
4725 }
4726 }
4727
4728 static void
4729 do_vec_SSHL (sim_cpu *cpu)
4730 {
4731 /* instr[31] = 0
4732 instr[30] = first part (0)/ second part (1)
4733 instr[29,24] = 00 1110
4734 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4735 instr[21] = 1
4736 instr[20,16] = Vm
4737 instr[15,10] = 0100 01
4738 instr[9,5] = Vn
4739 instr[4,0] = Vd. */
4740
4741 unsigned full = INSTR (30, 30);
4742 unsigned vm = INSTR (20, 16);
4743 unsigned vn = INSTR (9, 5);
4744 unsigned vd = INSTR (4, 0);
4745 unsigned i;
4746 signed int shift;
4747
4748 NYI_assert (29, 24, 0x0E);
4749 NYI_assert (21, 21, 1);
4750 NYI_assert (15, 10, 0x11);
4751
4752 /* FIXME: What is a signed shift left in this context ?. */
4753
4754 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4755 switch (INSTR (23, 22))
4756 {
4757 case 0:
4758 for (i = 0; i < (full ? 16 : 8); i++)
4759 {
4760 shift = aarch64_get_vec_s8 (cpu, vm, i);
4761 if (shift >= 0)
4762 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4763 << shift);
4764 else
4765 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4766 >> - shift);
4767 }
4768 return;
4769
4770 case 1:
4771 for (i = 0; i < (full ? 8 : 4); i++)
4772 {
4773 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4774 if (shift >= 0)
4775 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4776 << shift);
4777 else
4778 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4779 >> - shift);
4780 }
4781 return;
4782
4783 case 2:
4784 for (i = 0; i < (full ? 4 : 2); i++)
4785 {
4786 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4787 if (shift >= 0)
4788 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4789 << shift);
4790 else
4791 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4792 >> - shift);
4793 }
4794 return;
4795
4796 case 3:
4797 if (! full)
4798 HALT_UNALLOC;
4799 for (i = 0; i < 2; i++)
4800 {
4801 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4802 if (shift >= 0)
4803 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4804 << shift);
4805 else
4806 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4807 >> - shift);
4808 }
4809 return;
4810 }
4811 }
4812
4813 static void
4814 do_vec_USHL (sim_cpu *cpu)
4815 {
4816 /* instr[31] = 0
4817 instr[30] = first part (0)/ second part (1)
4818 instr[29,24] = 10 1110
4819 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4820 instr[21] = 1
4821 instr[20,16] = Vm
4822 instr[15,10] = 0100 01
4823 instr[9,5] = Vn
4824 instr[4,0] = Vd */
4825
4826 unsigned full = INSTR (30, 30);
4827 unsigned vm = INSTR (20, 16);
4828 unsigned vn = INSTR (9, 5);
4829 unsigned vd = INSTR (4, 0);
4830 unsigned i;
4831 signed int shift;
4832
4833 NYI_assert (29, 24, 0x2E);
4834 NYI_assert (15, 10, 0x11);
4835
4836 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4837 switch (INSTR (23, 22))
4838 {
4839 case 0:
4840 for (i = 0; i < (full ? 16 : 8); i++)
4841 {
4842 shift = aarch64_get_vec_s8 (cpu, vm, i);
4843 if (shift >= 0)
4844 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4845 << shift);
4846 else
4847 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4848 >> - shift);
4849 }
4850 return;
4851
4852 case 1:
4853 for (i = 0; i < (full ? 8 : 4); i++)
4854 {
4855 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4856 if (shift >= 0)
4857 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4858 << shift);
4859 else
4860 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4861 >> - shift);
4862 }
4863 return;
4864
4865 case 2:
4866 for (i = 0; i < (full ? 4 : 2); i++)
4867 {
4868 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4869 if (shift >= 0)
4870 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4871 << shift);
4872 else
4873 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4874 >> - shift);
4875 }
4876 return;
4877
4878 case 3:
4879 if (! full)
4880 HALT_UNALLOC;
4881 for (i = 0; i < 2; i++)
4882 {
4883 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4884 if (shift >= 0)
4885 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4886 << shift);
4887 else
4888 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4889 >> - shift);
4890 }
4891 return;
4892 }
4893 }
4894
4895 static void
4896 do_vec_FMLA (sim_cpu *cpu)
4897 {
4898 /* instr[31] = 0
4899 instr[30] = full/half selector
4900 instr[29,23] = 0011100
4901 instr[22] = size: 0=>float, 1=>double
4902 instr[21] = 1
4903 instr[20,16] = Vn
4904 instr[15,10] = 1100 11
4905 instr[9,5] = Vm
4906 instr[4.0] = Vd. */
4907
4908 unsigned vm = INSTR (20, 16);
4909 unsigned vn = INSTR (9, 5);
4910 unsigned vd = INSTR (4, 0);
4911 unsigned i;
4912 int full = INSTR (30, 30);
4913
4914 NYI_assert (29, 23, 0x1C);
4915 NYI_assert (21, 21, 1);
4916 NYI_assert (15, 10, 0x33);
4917
4918 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4919 if (INSTR (22, 22))
4920 {
4921 if (! full)
4922 HALT_UNALLOC;
4923 for (i = 0; i < 2; i++)
4924 aarch64_set_vec_double (cpu, vd, i,
4925 aarch64_get_vec_double (cpu, vn, i) *
4926 aarch64_get_vec_double (cpu, vm, i) +
4927 aarch64_get_vec_double (cpu, vd, i));
4928 }
4929 else
4930 {
4931 for (i = 0; i < (full ? 4 : 2); i++)
4932 aarch64_set_vec_float (cpu, vd, i,
4933 aarch64_get_vec_float (cpu, vn, i) *
4934 aarch64_get_vec_float (cpu, vm, i) +
4935 aarch64_get_vec_float (cpu, vd, i));
4936 }
4937 }
4938
4939 static void
4940 do_vec_max (sim_cpu *cpu)
4941 {
4942 /* instr[31] = 0
4943 instr[30] = full/half selector
4944 instr[29] = SMAX (0) / UMAX (1)
4945 instr[28,24] = 0 1110
4946 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4947 instr[21] = 1
4948 instr[20,16] = Vn
4949 instr[15,10] = 0110 01
4950 instr[9,5] = Vm
4951 instr[4.0] = Vd. */
4952
4953 unsigned vm = INSTR (20, 16);
4954 unsigned vn = INSTR (9, 5);
4955 unsigned vd = INSTR (4, 0);
4956 unsigned i;
4957 int full = INSTR (30, 30);
4958
4959 NYI_assert (28, 24, 0x0E);
4960 NYI_assert (21, 21, 1);
4961 NYI_assert (15, 10, 0x19);
4962
4963 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4964 if (INSTR (29, 29))
4965 {
4966 switch (INSTR (23, 22))
4967 {
4968 case 0:
4969 for (i = 0; i < (full ? 16 : 8); i++)
4970 aarch64_set_vec_u8 (cpu, vd, i,
4971 aarch64_get_vec_u8 (cpu, vn, i)
4972 > aarch64_get_vec_u8 (cpu, vm, i)
4973 ? aarch64_get_vec_u8 (cpu, vn, i)
4974 : aarch64_get_vec_u8 (cpu, vm, i));
4975 return;
4976
4977 case 1:
4978 for (i = 0; i < (full ? 8 : 4); i++)
4979 aarch64_set_vec_u16 (cpu, vd, i,
4980 aarch64_get_vec_u16 (cpu, vn, i)
4981 > aarch64_get_vec_u16 (cpu, vm, i)
4982 ? aarch64_get_vec_u16 (cpu, vn, i)
4983 : aarch64_get_vec_u16 (cpu, vm, i));
4984 return;
4985
4986 case 2:
4987 for (i = 0; i < (full ? 4 : 2); i++)
4988 aarch64_set_vec_u32 (cpu, vd, i,
4989 aarch64_get_vec_u32 (cpu, vn, i)
4990 > aarch64_get_vec_u32 (cpu, vm, i)
4991 ? aarch64_get_vec_u32 (cpu, vn, i)
4992 : aarch64_get_vec_u32 (cpu, vm, i));
4993 return;
4994
4995 case 3:
4996 HALT_UNALLOC;
4997 }
4998 }
4999 else
5000 {
5001 switch (INSTR (23, 22))
5002 {
5003 case 0:
5004 for (i = 0; i < (full ? 16 : 8); i++)
5005 aarch64_set_vec_s8 (cpu, vd, i,
5006 aarch64_get_vec_s8 (cpu, vn, i)
5007 > aarch64_get_vec_s8 (cpu, vm, i)
5008 ? aarch64_get_vec_s8 (cpu, vn, i)
5009 : aarch64_get_vec_s8 (cpu, vm, i));
5010 return;
5011
5012 case 1:
5013 for (i = 0; i < (full ? 8 : 4); i++)
5014 aarch64_set_vec_s16 (cpu, vd, i,
5015 aarch64_get_vec_s16 (cpu, vn, i)
5016 > aarch64_get_vec_s16 (cpu, vm, i)
5017 ? aarch64_get_vec_s16 (cpu, vn, i)
5018 : aarch64_get_vec_s16 (cpu, vm, i));
5019 return;
5020
5021 case 2:
5022 for (i = 0; i < (full ? 4 : 2); i++)
5023 aarch64_set_vec_s32 (cpu, vd, i,
5024 aarch64_get_vec_s32 (cpu, vn, i)
5025 > aarch64_get_vec_s32 (cpu, vm, i)
5026 ? aarch64_get_vec_s32 (cpu, vn, i)
5027 : aarch64_get_vec_s32 (cpu, vm, i));
5028 return;
5029
5030 case 3:
5031 HALT_UNALLOC;
5032 }
5033 }
5034 }
5035
5036 static void
5037 do_vec_min (sim_cpu *cpu)
5038 {
5039 /* instr[31] = 0
5040 instr[30] = full/half selector
5041 instr[29] = SMIN (0) / UMIN (1)
5042 instr[28,24] = 0 1110
5043 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5044 instr[21] = 1
5045 instr[20,16] = Vn
5046 instr[15,10] = 0110 11
5047 instr[9,5] = Vm
5048 instr[4.0] = Vd. */
5049
5050 unsigned vm = INSTR (20, 16);
5051 unsigned vn = INSTR (9, 5);
5052 unsigned vd = INSTR (4, 0);
5053 unsigned i;
5054 int full = INSTR (30, 30);
5055
5056 NYI_assert (28, 24, 0x0E);
5057 NYI_assert (21, 21, 1);
5058 NYI_assert (15, 10, 0x1B);
5059
5060 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5061 if (INSTR (29, 29))
5062 {
5063 switch (INSTR (23, 22))
5064 {
5065 case 0:
5066 for (i = 0; i < (full ? 16 : 8); i++)
5067 aarch64_set_vec_u8 (cpu, vd, i,
5068 aarch64_get_vec_u8 (cpu, vn, i)
5069 < aarch64_get_vec_u8 (cpu, vm, i)
5070 ? aarch64_get_vec_u8 (cpu, vn, i)
5071 : aarch64_get_vec_u8 (cpu, vm, i));
5072 return;
5073
5074 case 1:
5075 for (i = 0; i < (full ? 8 : 4); i++)
5076 aarch64_set_vec_u16 (cpu, vd, i,
5077 aarch64_get_vec_u16 (cpu, vn, i)
5078 < aarch64_get_vec_u16 (cpu, vm, i)
5079 ? aarch64_get_vec_u16 (cpu, vn, i)
5080 : aarch64_get_vec_u16 (cpu, vm, i));
5081 return;
5082
5083 case 2:
5084 for (i = 0; i < (full ? 4 : 2); i++)
5085 aarch64_set_vec_u32 (cpu, vd, i,
5086 aarch64_get_vec_u32 (cpu, vn, i)
5087 < aarch64_get_vec_u32 (cpu, vm, i)
5088 ? aarch64_get_vec_u32 (cpu, vn, i)
5089 : aarch64_get_vec_u32 (cpu, vm, i));
5090 return;
5091
5092 case 3:
5093 HALT_UNALLOC;
5094 }
5095 }
5096 else
5097 {
5098 switch (INSTR (23, 22))
5099 {
5100 case 0:
5101 for (i = 0; i < (full ? 16 : 8); i++)
5102 aarch64_set_vec_s8 (cpu, vd, i,
5103 aarch64_get_vec_s8 (cpu, vn, i)
5104 < aarch64_get_vec_s8 (cpu, vm, i)
5105 ? aarch64_get_vec_s8 (cpu, vn, i)
5106 : aarch64_get_vec_s8 (cpu, vm, i));
5107 return;
5108
5109 case 1:
5110 for (i = 0; i < (full ? 8 : 4); i++)
5111 aarch64_set_vec_s16 (cpu, vd, i,
5112 aarch64_get_vec_s16 (cpu, vn, i)
5113 < aarch64_get_vec_s16 (cpu, vm, i)
5114 ? aarch64_get_vec_s16 (cpu, vn, i)
5115 : aarch64_get_vec_s16 (cpu, vm, i));
5116 return;
5117
5118 case 2:
5119 for (i = 0; i < (full ? 4 : 2); i++)
5120 aarch64_set_vec_s32 (cpu, vd, i,
5121 aarch64_get_vec_s32 (cpu, vn, i)
5122 < aarch64_get_vec_s32 (cpu, vm, i)
5123 ? aarch64_get_vec_s32 (cpu, vn, i)
5124 : aarch64_get_vec_s32 (cpu, vm, i));
5125 return;
5126
5127 case 3:
5128 HALT_UNALLOC;
5129 }
5130 }
5131 }
5132
5133 static void
5134 do_vec_sub_long (sim_cpu *cpu)
5135 {
5136 /* instr[31] = 0
5137 instr[30] = lower (0) / upper (1)
5138 instr[29] = signed (0) / unsigned (1)
5139 instr[28,24] = 0 1110
5140 instr[23,22] = size: bytes (00), half (01), word (10)
5141 instr[21] = 1
5142 insrt[20,16] = Vm
5143 instr[15,10] = 0010 00
5144 instr[9,5] = Vn
5145 instr[4,0] = V dest. */
5146
5147 unsigned size = INSTR (23, 22);
5148 unsigned vm = INSTR (20, 16);
5149 unsigned vn = INSTR (9, 5);
5150 unsigned vd = INSTR (4, 0);
5151 unsigned bias = 0;
5152 unsigned i;
5153
5154 NYI_assert (28, 24, 0x0E);
5155 NYI_assert (21, 21, 1);
5156 NYI_assert (15, 10, 0x08);
5157
5158 if (size == 3)
5159 HALT_UNALLOC;
5160
5161 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5162 switch (INSTR (30, 29))
5163 {
5164 case 2: /* SSUBL2. */
5165 bias = 2;
5166 case 0: /* SSUBL. */
5167 switch (size)
5168 {
5169 case 0:
5170 bias *= 3;
5171 for (i = 0; i < 8; i++)
5172 aarch64_set_vec_s16 (cpu, vd, i,
5173 aarch64_get_vec_s8 (cpu, vn, i + bias)
5174 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5175 break;
5176
5177 case 1:
5178 bias *= 2;
5179 for (i = 0; i < 4; i++)
5180 aarch64_set_vec_s32 (cpu, vd, i,
5181 aarch64_get_vec_s16 (cpu, vn, i + bias)
5182 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5183 break;
5184
5185 case 2:
5186 for (i = 0; i < 2; i++)
5187 aarch64_set_vec_s64 (cpu, vd, i,
5188 aarch64_get_vec_s32 (cpu, vn, i + bias)
5189 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5190 break;
5191
5192 default:
5193 HALT_UNALLOC;
5194 }
5195 break;
5196
5197 case 3: /* USUBL2. */
5198 bias = 2;
5199 case 1: /* USUBL. */
5200 switch (size)
5201 {
5202 case 0:
5203 bias *= 3;
5204 for (i = 0; i < 8; i++)
5205 aarch64_set_vec_u16 (cpu, vd, i,
5206 aarch64_get_vec_u8 (cpu, vn, i + bias)
5207 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5208 break;
5209
5210 case 1:
5211 bias *= 2;
5212 for (i = 0; i < 4; i++)
5213 aarch64_set_vec_u32 (cpu, vd, i,
5214 aarch64_get_vec_u16 (cpu, vn, i + bias)
5215 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5216 break;
5217
5218 case 2:
5219 for (i = 0; i < 2; i++)
5220 aarch64_set_vec_u64 (cpu, vd, i,
5221 aarch64_get_vec_u32 (cpu, vn, i + bias)
5222 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5223 break;
5224
5225 default:
5226 HALT_UNALLOC;
5227 }
5228 break;
5229 }
5230 }
5231
5232 static void
5233 do_vec_ADDP (sim_cpu *cpu)
5234 {
5235 /* instr[31] = 0
5236 instr[30] = half(0)/full(1)
5237 instr[29,24] = 00 1110
5238 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5239 instr[21] = 1
5240 insrt[20,16] = Vm
5241 instr[15,10] = 1011 11
5242 instr[9,5] = Vn
5243 instr[4,0] = V dest. */
5244
5245 FRegister copy_vn;
5246 FRegister copy_vm;
5247 unsigned full = INSTR (30, 30);
5248 unsigned size = INSTR (23, 22);
5249 unsigned vm = INSTR (20, 16);
5250 unsigned vn = INSTR (9, 5);
5251 unsigned vd = INSTR (4, 0);
5252 unsigned i, range;
5253
5254 NYI_assert (29, 24, 0x0E);
5255 NYI_assert (21, 21, 1);
5256 NYI_assert (15, 10, 0x2F);
5257
5258 /* Make copies of the source registers in case vd == vn/vm. */
5259 copy_vn = cpu->fr[vn];
5260 copy_vm = cpu->fr[vm];
5261
5262 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5263 switch (size)
5264 {
5265 case 0:
5266 range = full ? 8 : 4;
5267 for (i = 0; i < range; i++)
5268 {
5269 aarch64_set_vec_u8 (cpu, vd, i,
5270 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5271 aarch64_set_vec_u8 (cpu, vd, i + range,
5272 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5273 }
5274 return;
5275
5276 case 1:
5277 range = full ? 4 : 2;
5278 for (i = 0; i < range; i++)
5279 {
5280 aarch64_set_vec_u16 (cpu, vd, i,
5281 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5282 aarch64_set_vec_u16 (cpu, vd, i + range,
5283 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5284 }
5285 return;
5286
5287 case 2:
5288 range = full ? 2 : 1;
5289 for (i = 0; i < range; i++)
5290 {
5291 aarch64_set_vec_u32 (cpu, vd, i,
5292 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5293 aarch64_set_vec_u32 (cpu, vd, i + range,
5294 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5295 }
5296 return;
5297
5298 case 3:
5299 if (! full)
5300 HALT_UNALLOC;
5301 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5302 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5303 return;
5304 }
5305 }
5306
5307 static void
5308 do_vec_UMOV (sim_cpu *cpu)
5309 {
5310 /* instr[31] = 0
5311 instr[30] = 32-bit(0)/64-bit(1)
5312 instr[29,21] = 00 1110 000
5313 insrt[20,16] = size & index
5314 instr[15,10] = 0011 11
5315 instr[9,5] = V source
5316 instr[4,0] = R dest. */
5317
5318 unsigned vs = INSTR (9, 5);
5319 unsigned rd = INSTR (4, 0);
5320 unsigned index;
5321
5322 NYI_assert (29, 21, 0x070);
5323 NYI_assert (15, 10, 0x0F);
5324
5325 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5326 if (INSTR (16, 16))
5327 {
5328 /* Byte transfer. */
5329 index = INSTR (20, 17);
5330 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5331 aarch64_get_vec_u8 (cpu, vs, index));
5332 }
5333 else if (INSTR (17, 17))
5334 {
5335 index = INSTR (20, 18);
5336 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5337 aarch64_get_vec_u16 (cpu, vs, index));
5338 }
5339 else if (INSTR (18, 18))
5340 {
5341 index = INSTR (20, 19);
5342 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5343 aarch64_get_vec_u32 (cpu, vs, index));
5344 }
5345 else
5346 {
5347 if (INSTR (30, 30) != 1)
5348 HALT_UNALLOC;
5349
5350 index = INSTR (20, 20);
5351 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5352 aarch64_get_vec_u64 (cpu, vs, index));
5353 }
5354 }
5355
5356 static void
5357 do_vec_FABS (sim_cpu *cpu)
5358 {
5359 /* instr[31] = 0
5360 instr[30] = half(0)/full(1)
5361 instr[29,23] = 00 1110 1
5362 instr[22] = float(0)/double(1)
5363 instr[21,16] = 10 0000
5364 instr[15,10] = 1111 10
5365 instr[9,5] = Vn
5366 instr[4,0] = Vd. */
5367
5368 unsigned vn = INSTR (9, 5);
5369 unsigned vd = INSTR (4, 0);
5370 unsigned full = INSTR (30, 30);
5371 unsigned i;
5372
5373 NYI_assert (29, 23, 0x1D);
5374 NYI_assert (21, 10, 0x83E);
5375
5376 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5377 if (INSTR (22, 22))
5378 {
5379 if (! full)
5380 HALT_NYI;
5381
5382 for (i = 0; i < 2; i++)
5383 aarch64_set_vec_double (cpu, vd, i,
5384 fabs (aarch64_get_vec_double (cpu, vn, i)));
5385 }
5386 else
5387 {
5388 for (i = 0; i < (full ? 4 : 2); i++)
5389 aarch64_set_vec_float (cpu, vd, i,
5390 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5391 }
5392 }
5393
5394 static void
5395 do_vec_FCVTZS (sim_cpu *cpu)
5396 {
5397 /* instr[31] = 0
5398 instr[30] = half (0) / all (1)
5399 instr[29,23] = 00 1110 1
5400 instr[22] = single (0) / double (1)
5401 instr[21,10] = 10 0001 1011 10
5402 instr[9,5] = Rn
5403 instr[4,0] = Rd. */
5404
5405 unsigned rn = INSTR (9, 5);
5406 unsigned rd = INSTR (4, 0);
5407 unsigned full = INSTR (30, 30);
5408 unsigned i;
5409
5410 NYI_assert (31, 31, 0);
5411 NYI_assert (29, 23, 0x1D);
5412 NYI_assert (21, 10, 0x86E);
5413
5414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5415 if (INSTR (22, 22))
5416 {
5417 if (! full)
5418 HALT_UNALLOC;
5419
5420 for (i = 0; i < 2; i++)
5421 aarch64_set_vec_s64 (cpu, rd, i,
5422 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5423 }
5424 else
5425 for (i = 0; i < (full ? 4 : 2); i++)
5426 aarch64_set_vec_s32 (cpu, rd, i,
5427 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5428 }
5429
5430 static void
5431 do_vec_REV64 (sim_cpu *cpu)
5432 {
5433 /* instr[31] = 0
5434 instr[30] = full/half
5435 instr[29,24] = 00 1110
5436 instr[23,22] = size
5437 instr[21,10] = 10 0000 0000 10
5438 instr[9,5] = Rn
5439 instr[4,0] = Rd. */
5440
5441 unsigned rn = INSTR (9, 5);
5442 unsigned rd = INSTR (4, 0);
5443 unsigned size = INSTR (23, 22);
5444 unsigned full = INSTR (30, 30);
5445 unsigned i;
5446 FRegister val;
5447
5448 NYI_assert (29, 24, 0x0E);
5449 NYI_assert (21, 10, 0x802);
5450
5451 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5452 switch (size)
5453 {
5454 case 0:
5455 for (i = 0; i < (full ? 16 : 8); i++)
5456 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5457 break;
5458
5459 case 1:
5460 for (i = 0; i < (full ? 8 : 4); i++)
5461 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5462 break;
5463
5464 case 2:
5465 for (i = 0; i < (full ? 4 : 2); i++)
5466 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5467 break;
5468
5469 case 3:
5470 HALT_UNALLOC;
5471 }
5472
5473 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5474 if (full)
5475 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5476 }
5477
5478 static void
5479 do_vec_REV16 (sim_cpu *cpu)
5480 {
5481 /* instr[31] = 0
5482 instr[30] = full/half
5483 instr[29,24] = 00 1110
5484 instr[23,22] = size
5485 instr[21,10] = 10 0000 0001 10
5486 instr[9,5] = Rn
5487 instr[4,0] = Rd. */
5488
5489 unsigned rn = INSTR (9, 5);
5490 unsigned rd = INSTR (4, 0);
5491 unsigned size = INSTR (23, 22);
5492 unsigned full = INSTR (30, 30);
5493 unsigned i;
5494 FRegister val;
5495
5496 NYI_assert (29, 24, 0x0E);
5497 NYI_assert (21, 10, 0x806);
5498
5499 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5500 switch (size)
5501 {
5502 case 0:
5503 for (i = 0; i < (full ? 16 : 8); i++)
5504 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5505 break;
5506
5507 default:
5508 HALT_UNALLOC;
5509 }
5510
5511 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5512 if (full)
5513 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5514 }
5515
5516 static void
5517 do_vec_op1 (sim_cpu *cpu)
5518 {
5519 /* instr[31] = 0
5520 instr[30] = half/full
5521 instr[29,24] = 00 1110
5522 instr[23,21] = ???
5523 instr[20,16] = Vm
5524 instr[15,10] = sub-opcode
5525 instr[9,5] = Vn
5526 instr[4,0] = Vd */
5527 NYI_assert (29, 24, 0x0E);
5528
5529 if (INSTR (21, 21) == 0)
5530 {
5531 if (INSTR (23, 22) == 0)
5532 {
5533 if (INSTR (30, 30) == 1
5534 && INSTR (17, 14) == 0
5535 && INSTR (12, 10) == 7)
5536 return do_vec_ins_2 (cpu);
5537
5538 switch (INSTR (15, 10))
5539 {
5540 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5541 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5542 case 0x07: do_vec_INS (cpu); return;
5543 case 0x0A: do_vec_TRN (cpu); return;
5544
5545 case 0x0F:
5546 if (INSTR (17, 16) == 0)
5547 {
5548 do_vec_MOV_into_scalar (cpu);
5549 return;
5550 }
5551 break;
5552
5553 case 0x00:
5554 case 0x08:
5555 case 0x10:
5556 case 0x18:
5557 do_vec_TBL (cpu); return;
5558
5559 case 0x06:
5560 case 0x16:
5561 do_vec_UZP (cpu); return;
5562
5563 case 0x0E:
5564 case 0x1E:
5565 do_vec_ZIP (cpu); return;
5566
5567 default:
5568 HALT_NYI;
5569 }
5570 }
5571
5572 switch (INSTR (13, 10))
5573 {
5574 case 0x6: do_vec_UZP (cpu); return;
5575 case 0xE: do_vec_ZIP (cpu); return;
5576 case 0xA: do_vec_TRN (cpu); return;
5577 case 0xF: do_vec_UMOV (cpu); return;
5578 default: HALT_NYI;
5579 }
5580 }
5581
5582 switch (INSTR (15, 10))
5583 {
5584 case 0x02: do_vec_REV64 (cpu); return;
5585 case 0x06: do_vec_REV16 (cpu); return;
5586
5587 case 0x07:
5588 switch (INSTR (23, 21))
5589 {
5590 case 1: do_vec_AND (cpu); return;
5591 case 3: do_vec_BIC (cpu); return;
5592 case 5: do_vec_ORR (cpu); return;
5593 case 7: do_vec_ORN (cpu); return;
5594 default: HALT_NYI;
5595 }
5596
5597 case 0x08: do_vec_sub_long (cpu); return;
5598 case 0x0a: do_vec_XTN (cpu); return;
5599 case 0x11: do_vec_SSHL (cpu); return;
5600 case 0x19: do_vec_max (cpu); return;
5601 case 0x1B: do_vec_min (cpu); return;
5602 case 0x21: do_vec_add (cpu); return;
5603 case 0x25: do_vec_MLA (cpu); return;
5604 case 0x27: do_vec_mul (cpu); return;
5605 case 0x2F: do_vec_ADDP (cpu); return;
5606 case 0x30: do_vec_mull (cpu); return;
5607 case 0x33: do_vec_FMLA (cpu); return;
5608 case 0x35: do_vec_fadd (cpu); return;
5609
5610 case 0x2E:
5611 switch (INSTR (20, 16))
5612 {
5613 case 0x00: do_vec_ABS (cpu); return;
5614 case 0x01: do_vec_FCVTZS (cpu); return;
5615 case 0x11: do_vec_ADDV (cpu); return;
5616 default: HALT_NYI;
5617 }
5618
5619 case 0x31:
5620 case 0x3B:
5621 do_vec_Fminmax (cpu); return;
5622
5623 case 0x0D:
5624 case 0x0F:
5625 case 0x22:
5626 case 0x23:
5627 case 0x26:
5628 case 0x2A:
5629 case 0x32:
5630 case 0x36:
5631 case 0x39:
5632 case 0x3A:
5633 do_vec_compare (cpu); return;
5634
5635 case 0x3E:
5636 do_vec_FABS (cpu); return;
5637
5638 default:
5639 HALT_NYI;
5640 }
5641 }
5642
5643 static void
5644 do_vec_xtl (sim_cpu *cpu)
5645 {
5646 /* instr[31] = 0
5647 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5648 instr[28,22] = 0 1111 00
5649 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5650 instr[15,10] = 1010 01
5651 instr[9,5] = V source
5652 instr[4,0] = V dest. */
5653
5654 unsigned vs = INSTR (9, 5);
5655 unsigned vd = INSTR (4, 0);
5656 unsigned i, shift, bias = 0;
5657
5658 NYI_assert (28, 22, 0x3C);
5659 NYI_assert (15, 10, 0x29);
5660
5661 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5662 switch (INSTR (30, 29))
5663 {
5664 case 2: /* SXTL2, SSHLL2. */
5665 bias = 2;
5666 case 0: /* SXTL, SSHLL. */
5667 if (INSTR (21, 21))
5668 {
5669 int64_t val1, val2;
5670
5671 shift = INSTR (20, 16);
5672 /* Get the source values before setting the destination values
5673 in case the source and destination are the same. */
5674 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5675 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5676 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5677 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5678 }
5679 else if (INSTR (20, 20))
5680 {
5681 int32_t v[4];
5682 int32_t v1,v2,v3,v4;
5683
5684 shift = INSTR (19, 16);
5685 bias *= 2;
5686 for (i = 0; i < 4; i++)
5687 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5688 for (i = 0; i < 4; i++)
5689 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5690 }
5691 else
5692 {
5693 int16_t v[8];
5694 NYI_assert (19, 19, 1);
5695
5696 shift = INSTR (18, 16);
5697 bias *= 3;
5698 for (i = 0; i < 8; i++)
5699 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5700 for (i = 0; i < 8; i++)
5701 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5702 }
5703 return;
5704
5705 case 3: /* UXTL2, USHLL2. */
5706 bias = 2;
5707 case 1: /* UXTL, USHLL. */
5708 if (INSTR (21, 21))
5709 {
5710 uint64_t v1, v2;
5711 shift = INSTR (20, 16);
5712 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5713 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5714 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5715 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5716 }
5717 else if (INSTR (20, 20))
5718 {
5719 uint32_t v[4];
5720 shift = INSTR (19, 16);
5721 bias *= 2;
5722 for (i = 0; i < 4; i++)
5723 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5724 for (i = 0; i < 4; i++)
5725 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5726 }
5727 else
5728 {
5729 uint16_t v[8];
5730 NYI_assert (19, 19, 1);
5731
5732 shift = INSTR (18, 16);
5733 bias *= 3;
5734 for (i = 0; i < 8; i++)
5735 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5736 for (i = 0; i < 8; i++)
5737 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5738 }
5739 return;
5740 }
5741 }
5742
5743 static void
5744 do_vec_SHL (sim_cpu *cpu)
5745 {
5746 /* instr [31] = 0
5747 instr [30] = half(0)/full(1)
5748 instr [29,23] = 001 1110
5749 instr [22,16] = size and shift amount
5750 instr [15,10] = 01 0101
5751 instr [9, 5] = Vs
5752 instr [4, 0] = Vd. */
5753
5754 int shift;
5755 int full = INSTR (30, 30);
5756 unsigned vs = INSTR (9, 5);
5757 unsigned vd = INSTR (4, 0);
5758 unsigned i;
5759
5760 NYI_assert (29, 23, 0x1E);
5761 NYI_assert (15, 10, 0x15);
5762
5763 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5764 if (INSTR (22, 22))
5765 {
5766 shift = INSTR (21, 16);
5767
5768 if (full == 0)
5769 HALT_UNALLOC;
5770
5771 for (i = 0; i < 2; i++)
5772 {
5773 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5774 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5775 }
5776
5777 return;
5778 }
5779
5780 if (INSTR (21, 21))
5781 {
5782 shift = INSTR (20, 16);
5783
5784 for (i = 0; i < (full ? 4 : 2); i++)
5785 {
5786 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5787 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5788 }
5789
5790 return;
5791 }
5792
5793 if (INSTR (20, 20))
5794 {
5795 shift = INSTR (19, 16);
5796
5797 for (i = 0; i < (full ? 8 : 4); i++)
5798 {
5799 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5800 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5801 }
5802
5803 return;
5804 }
5805
5806 if (INSTR (19, 19) == 0)
5807 HALT_UNALLOC;
5808
5809 shift = INSTR (18, 16);
5810
5811 for (i = 0; i < (full ? 16 : 8); i++)
5812 {
5813 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5814 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5815 }
5816 }
5817
5818 static void
5819 do_vec_SSHR_USHR (sim_cpu *cpu)
5820 {
5821 /* instr [31] = 0
5822 instr [30] = half(0)/full(1)
5823 instr [29] = signed(0)/unsigned(1)
5824 instr [28,23] = 0 1111 0
5825 instr [22,16] = size and shift amount
5826 instr [15,10] = 0000 01
5827 instr [9, 5] = Vs
5828 instr [4, 0] = Vd. */
5829
5830 int full = INSTR (30, 30);
5831 int sign = ! INSTR (29, 29);
5832 unsigned shift = INSTR (22, 16);
5833 unsigned vs = INSTR (9, 5);
5834 unsigned vd = INSTR (4, 0);
5835 unsigned i;
5836
5837 NYI_assert (28, 23, 0x1E);
5838 NYI_assert (15, 10, 0x01);
5839
5840 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5841 if (INSTR (22, 22))
5842 {
5843 shift = 128 - shift;
5844
5845 if (full == 0)
5846 HALT_UNALLOC;
5847
5848 if (sign)
5849 for (i = 0; i < 2; i++)
5850 {
5851 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5852 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5853 }
5854 else
5855 for (i = 0; i < 2; i++)
5856 {
5857 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5858 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5859 }
5860
5861 return;
5862 }
5863
5864 if (INSTR (21, 21))
5865 {
5866 shift = 64 - shift;
5867
5868 if (sign)
5869 for (i = 0; i < (full ? 4 : 2); i++)
5870 {
5871 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5872 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5873 }
5874 else
5875 for (i = 0; i < (full ? 4 : 2); i++)
5876 {
5877 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5878 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5879 }
5880
5881 return;
5882 }
5883
5884 if (INSTR (20, 20))
5885 {
5886 shift = 32 - shift;
5887
5888 if (sign)
5889 for (i = 0; i < (full ? 8 : 4); i++)
5890 {
5891 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
5892 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
5893 }
5894 else
5895 for (i = 0; i < (full ? 8 : 4); i++)
5896 {
5897 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5898 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
5899 }
5900
5901 return;
5902 }
5903
5904 if (INSTR (19, 19) == 0)
5905 HALT_UNALLOC;
5906
5907 shift = 16 - shift;
5908
5909 if (sign)
5910 for (i = 0; i < (full ? 16 : 8); i++)
5911 {
5912 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
5913 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
5914 }
5915 else
5916 for (i = 0; i < (full ? 16 : 8); i++)
5917 {
5918 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5919 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
5920 }
5921 }
5922
5923 static void
5924 do_vec_MUL_by_element (sim_cpu *cpu)
5925 {
5926 /* instr[31] = 0
5927 instr[30] = half/full
5928 instr[29,24] = 00 1111
5929 instr[23,22] = size
5930 instr[21] = L
5931 instr[20] = M
5932 instr[19,16] = m
5933 instr[15,12] = 1000
5934 instr[11] = H
5935 instr[10] = 0
5936 instr[9,5] = Vn
5937 instr[4,0] = Vd */
5938
5939 unsigned full = INSTR (30, 30);
5940 unsigned L = INSTR (21, 21);
5941 unsigned H = INSTR (11, 11);
5942 unsigned vn = INSTR (9, 5);
5943 unsigned vd = INSTR (4, 0);
5944 unsigned size = INSTR (23, 22);
5945 unsigned index;
5946 unsigned vm;
5947 unsigned e;
5948
5949 NYI_assert (29, 24, 0x0F);
5950 NYI_assert (15, 12, 0x8);
5951 NYI_assert (10, 10, 0);
5952
5953 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5954 switch (size)
5955 {
5956 case 1:
5957 {
5958 /* 16 bit products. */
5959 uint16_t product;
5960 uint16_t element1;
5961 uint16_t element2;
5962
5963 index = (H << 2) | (L << 1) | INSTR (20, 20);
5964 vm = INSTR (19, 16);
5965 element2 = aarch64_get_vec_u16 (cpu, vm, index);
5966
5967 for (e = 0; e < (full ? 8 : 4); e ++)
5968 {
5969 element1 = aarch64_get_vec_u16 (cpu, vn, e);
5970 product = element1 * element2;
5971 aarch64_set_vec_u16 (cpu, vd, e, product);
5972 }
5973 }
5974 break;
5975
5976 case 2:
5977 {
5978 /* 32 bit products. */
5979 uint32_t product;
5980 uint32_t element1;
5981 uint32_t element2;
5982
5983 index = (H << 1) | L;
5984 vm = INSTR (20, 16);
5985 element2 = aarch64_get_vec_u32 (cpu, vm, index);
5986
5987 for (e = 0; e < (full ? 4 : 2); e ++)
5988 {
5989 element1 = aarch64_get_vec_u32 (cpu, vn, e);
5990 product = element1 * element2;
5991 aarch64_set_vec_u32 (cpu, vd, e, product);
5992 }
5993 }
5994 break;
5995
5996 default:
5997 HALT_UNALLOC;
5998 }
5999 }
6000
6001 static void
6002 do_FMLA_by_element (sim_cpu *cpu)
6003 {
6004 /* instr[31] = 0
6005 instr[30] = half/full
6006 instr[29,23] = 00 1111 1
6007 instr[22] = size
6008 instr[21] = L
6009 instr[20,16] = m
6010 instr[15,12] = 0001
6011 instr[11] = H
6012 instr[10] = 0
6013 instr[9,5] = Vn
6014 instr[4,0] = Vd */
6015
6016 unsigned full = INSTR (30, 30);
6017 unsigned size = INSTR (22, 22);
6018 unsigned L = INSTR (21, 21);
6019 unsigned vm = INSTR (20, 16);
6020 unsigned H = INSTR (11, 11);
6021 unsigned vn = INSTR (9, 5);
6022 unsigned vd = INSTR (4, 0);
6023 unsigned e;
6024
6025 NYI_assert (29, 23, 0x1F);
6026 NYI_assert (15, 12, 0x1);
6027 NYI_assert (10, 10, 0);
6028
6029 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6030 if (size)
6031 {
6032 double element1, element2;
6033
6034 if (! full || L)
6035 HALT_UNALLOC;
6036
6037 element2 = aarch64_get_vec_double (cpu, vm, H);
6038
6039 for (e = 0; e < 2; e++)
6040 {
6041 element1 = aarch64_get_vec_double (cpu, vn, e);
6042 element1 *= element2;
6043 element1 += aarch64_get_vec_double (cpu, vd, e);
6044 aarch64_set_vec_double (cpu, vd, e, element1);
6045 }
6046 }
6047 else
6048 {
6049 float element1;
6050 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6051
6052 for (e = 0; e < (full ? 4 : 2); e++)
6053 {
6054 element1 = aarch64_get_vec_float (cpu, vn, e);
6055 element1 *= element2;
6056 element1 += aarch64_get_vec_float (cpu, vd, e);
6057 aarch64_set_vec_float (cpu, vd, e, element1);
6058 }
6059 }
6060 }
6061
6062 static void
6063 do_vec_op2 (sim_cpu *cpu)
6064 {
6065 /* instr[31] = 0
6066 instr[30] = half/full
6067 instr[29,24] = 00 1111
6068 instr[23] = ?
6069 instr[22,16] = element size & index
6070 instr[15,10] = sub-opcode
6071 instr[9,5] = Vm
6072 instr[4,0] = Vd */
6073
6074 NYI_assert (29, 24, 0x0F);
6075
6076 if (INSTR (23, 23) != 0)
6077 {
6078 switch (INSTR (15, 10))
6079 {
6080 case 0x04:
6081 case 0x06:
6082 do_FMLA_by_element (cpu);
6083 return;
6084
6085 case 0x20:
6086 case 0x22:
6087 do_vec_MUL_by_element (cpu);
6088 return;
6089
6090 default:
6091 HALT_NYI;
6092 }
6093 }
6094 else
6095 {
6096 switch (INSTR (15, 10))
6097 {
6098 case 0x01: do_vec_SSHR_USHR (cpu); return;
6099 case 0x15: do_vec_SHL (cpu); return;
6100 case 0x20:
6101 case 0x22: do_vec_MUL_by_element (cpu); return;
6102 case 0x29: do_vec_xtl (cpu); return;
6103 default: HALT_NYI;
6104 }
6105 }
6106 }
6107
6108 static void
6109 do_vec_neg (sim_cpu *cpu)
6110 {
6111 /* instr[31] = 0
6112 instr[30] = full(1)/half(0)
6113 instr[29,24] = 10 1110
6114 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6115 instr[21,10] = 1000 0010 1110
6116 instr[9,5] = Vs
6117 instr[4,0] = Vd */
6118
6119 int full = INSTR (30, 30);
6120 unsigned vs = INSTR (9, 5);
6121 unsigned vd = INSTR (4, 0);
6122 unsigned i;
6123
6124 NYI_assert (29, 24, 0x2E);
6125 NYI_assert (21, 10, 0x82E);
6126
6127 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6128 switch (INSTR (23, 22))
6129 {
6130 case 0:
6131 for (i = 0; i < (full ? 16 : 8); i++)
6132 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6133 return;
6134
6135 case 1:
6136 for (i = 0; i < (full ? 8 : 4); i++)
6137 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6138 return;
6139
6140 case 2:
6141 for (i = 0; i < (full ? 4 : 2); i++)
6142 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6143 return;
6144
6145 case 3:
6146 if (! full)
6147 HALT_NYI;
6148 for (i = 0; i < 2; i++)
6149 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6150 return;
6151 }
6152 }
6153
6154 static void
6155 do_vec_sqrt (sim_cpu *cpu)
6156 {
6157 /* instr[31] = 0
6158 instr[30] = full(1)/half(0)
6159 instr[29,23] = 101 1101
6160 instr[22] = single(0)/double(1)
6161 instr[21,10] = 1000 0111 1110
6162 instr[9,5] = Vs
6163 instr[4,0] = Vd. */
6164
6165 int full = INSTR (30, 30);
6166 unsigned vs = INSTR (9, 5);
6167 unsigned vd = INSTR (4, 0);
6168 unsigned i;
6169
6170 NYI_assert (29, 23, 0x5B);
6171 NYI_assert (21, 10, 0x87E);
6172
6173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6174 if (INSTR (22, 22) == 0)
6175 for (i = 0; i < (full ? 4 : 2); i++)
6176 aarch64_set_vec_float (cpu, vd, i,
6177 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6178 else
6179 for (i = 0; i < 2; i++)
6180 aarch64_set_vec_double (cpu, vd, i,
6181 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6182 }
6183
6184 static void
6185 do_vec_mls_indexed (sim_cpu *cpu)
6186 {
6187 /* instr[31] = 0
6188 instr[30] = half(0)/full(1)
6189 instr[29,24] = 10 1111
6190 instr[23,22] = 16-bit(01)/32-bit(10)
6191 instr[21,20+11] = index (if 16-bit)
6192 instr[21+11] = index (if 32-bit)
6193 instr[20,16] = Vm
6194 instr[15,12] = 0100
6195 instr[11] = part of index
6196 instr[10] = 0
6197 instr[9,5] = Vs
6198 instr[4,0] = Vd. */
6199
6200 int full = INSTR (30, 30);
6201 unsigned vs = INSTR (9, 5);
6202 unsigned vd = INSTR (4, 0);
6203 unsigned vm = INSTR (20, 16);
6204 unsigned i;
6205
6206 NYI_assert (15, 12, 4);
6207 NYI_assert (10, 10, 0);
6208
6209 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6210 switch (INSTR (23, 22))
6211 {
6212 case 1:
6213 {
6214 unsigned elem;
6215 uint32_t val;
6216
6217 if (vm > 15)
6218 HALT_NYI;
6219
6220 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6221 val = aarch64_get_vec_u16 (cpu, vm, elem);
6222
6223 for (i = 0; i < (full ? 8 : 4); i++)
6224 aarch64_set_vec_u32 (cpu, vd, i,
6225 aarch64_get_vec_u32 (cpu, vd, i) -
6226 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6227 return;
6228 }
6229
6230 case 2:
6231 {
6232 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6233 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6234
6235 for (i = 0; i < (full ? 4 : 2); i++)
6236 aarch64_set_vec_u64 (cpu, vd, i,
6237 aarch64_get_vec_u64 (cpu, vd, i) -
6238 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6239 return;
6240 }
6241
6242 case 0:
6243 case 3:
6244 default:
6245 HALT_NYI;
6246 }
6247 }
6248
6249 static void
6250 do_vec_SUB (sim_cpu *cpu)
6251 {
6252 /* instr [31] = 0
6253 instr [30] = half(0)/full(1)
6254 instr [29,24] = 10 1110
6255 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6256 instr [21] = 1
6257 instr [20,16] = Vm
6258 instr [15,10] = 10 0001
6259 instr [9, 5] = Vn
6260 instr [4, 0] = Vd. */
6261
6262 unsigned full = INSTR (30, 30);
6263 unsigned vm = INSTR (20, 16);
6264 unsigned vn = INSTR (9, 5);
6265 unsigned vd = INSTR (4, 0);
6266 unsigned i;
6267
6268 NYI_assert (29, 24, 0x2E);
6269 NYI_assert (21, 21, 1);
6270 NYI_assert (15, 10, 0x21);
6271
6272 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6273 switch (INSTR (23, 22))
6274 {
6275 case 0:
6276 for (i = 0; i < (full ? 16 : 8); i++)
6277 aarch64_set_vec_s8 (cpu, vd, i,
6278 aarch64_get_vec_s8 (cpu, vn, i)
6279 - aarch64_get_vec_s8 (cpu, vm, i));
6280 return;
6281
6282 case 1:
6283 for (i = 0; i < (full ? 8 : 4); i++)
6284 aarch64_set_vec_s16 (cpu, vd, i,
6285 aarch64_get_vec_s16 (cpu, vn, i)
6286 - aarch64_get_vec_s16 (cpu, vm, i));
6287 return;
6288
6289 case 2:
6290 for (i = 0; i < (full ? 4 : 2); i++)
6291 aarch64_set_vec_s32 (cpu, vd, i,
6292 aarch64_get_vec_s32 (cpu, vn, i)
6293 - aarch64_get_vec_s32 (cpu, vm, i));
6294 return;
6295
6296 case 3:
6297 if (full == 0)
6298 HALT_UNALLOC;
6299
6300 for (i = 0; i < 2; i++)
6301 aarch64_set_vec_s64 (cpu, vd, i,
6302 aarch64_get_vec_s64 (cpu, vn, i)
6303 - aarch64_get_vec_s64 (cpu, vm, i));
6304 return;
6305 }
6306 }
6307
6308 static void
6309 do_vec_MLS (sim_cpu *cpu)
6310 {
6311 /* instr [31] = 0
6312 instr [30] = half(0)/full(1)
6313 instr [29,24] = 10 1110
6314 instr [23,22] = size: byte(00, half(01), word (10)
6315 instr [21] = 1
6316 instr [20,16] = Vm
6317 instr [15,10] = 10 0101
6318 instr [9, 5] = Vn
6319 instr [4, 0] = Vd. */
6320
6321 unsigned full = INSTR (30, 30);
6322 unsigned vm = INSTR (20, 16);
6323 unsigned vn = INSTR (9, 5);
6324 unsigned vd = INSTR (4, 0);
6325 unsigned i;
6326
6327 NYI_assert (29, 24, 0x2E);
6328 NYI_assert (21, 21, 1);
6329 NYI_assert (15, 10, 0x25);
6330
6331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6332 switch (INSTR (23, 22))
6333 {
6334 case 0:
6335 for (i = 0; i < (full ? 16 : 8); i++)
6336 aarch64_set_vec_u8 (cpu, vd, i,
6337 aarch64_get_vec_u8 (cpu, vd, i)
6338 - (aarch64_get_vec_u8 (cpu, vn, i)
6339 * aarch64_get_vec_u8 (cpu, vm, i)));
6340 return;
6341
6342 case 1:
6343 for (i = 0; i < (full ? 8 : 4); i++)
6344 aarch64_set_vec_u16 (cpu, vd, i,
6345 aarch64_get_vec_u16 (cpu, vd, i)
6346 - (aarch64_get_vec_u16 (cpu, vn, i)
6347 * aarch64_get_vec_u16 (cpu, vm, i)));
6348 return;
6349
6350 case 2:
6351 for (i = 0; i < (full ? 4 : 2); i++)
6352 aarch64_set_vec_u32 (cpu, vd, i,
6353 aarch64_get_vec_u32 (cpu, vd, i)
6354 - (aarch64_get_vec_u32 (cpu, vn, i)
6355 * aarch64_get_vec_u32 (cpu, vm, i)));
6356 return;
6357
6358 default:
6359 HALT_UNALLOC;
6360 }
6361 }
6362
6363 static void
6364 do_vec_FDIV (sim_cpu *cpu)
6365 {
6366 /* instr [31] = 0
6367 instr [30] = half(0)/full(1)
6368 instr [29,23] = 10 1110 0
6369 instr [22] = float()/double(1)
6370 instr [21] = 1
6371 instr [20,16] = Vm
6372 instr [15,10] = 1111 11
6373 instr [9, 5] = Vn
6374 instr [4, 0] = Vd. */
6375
6376 unsigned full = INSTR (30, 30);
6377 unsigned vm = INSTR (20, 16);
6378 unsigned vn = INSTR (9, 5);
6379 unsigned vd = INSTR (4, 0);
6380 unsigned i;
6381
6382 NYI_assert (29, 23, 0x5C);
6383 NYI_assert (21, 21, 1);
6384 NYI_assert (15, 10, 0x3F);
6385
6386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6387 if (INSTR (22, 22))
6388 {
6389 if (! full)
6390 HALT_UNALLOC;
6391
6392 for (i = 0; i < 2; i++)
6393 aarch64_set_vec_double (cpu, vd, i,
6394 aarch64_get_vec_double (cpu, vn, i)
6395 / aarch64_get_vec_double (cpu, vm, i));
6396 }
6397 else
6398 for (i = 0; i < (full ? 4 : 2); i++)
6399 aarch64_set_vec_float (cpu, vd, i,
6400 aarch64_get_vec_float (cpu, vn, i)
6401 / aarch64_get_vec_float (cpu, vm, i));
6402 }
6403
6404 static void
6405 do_vec_FMUL (sim_cpu *cpu)
6406 {
6407 /* instr [31] = 0
6408 instr [30] = half(0)/full(1)
6409 instr [29,23] = 10 1110 0
6410 instr [22] = float(0)/double(1)
6411 instr [21] = 1
6412 instr [20,16] = Vm
6413 instr [15,10] = 1101 11
6414 instr [9, 5] = Vn
6415 instr [4, 0] = Vd. */
6416
6417 unsigned full = INSTR (30, 30);
6418 unsigned vm = INSTR (20, 16);
6419 unsigned vn = INSTR (9, 5);
6420 unsigned vd = INSTR (4, 0);
6421 unsigned i;
6422
6423 NYI_assert (29, 23, 0x5C);
6424 NYI_assert (21, 21, 1);
6425 NYI_assert (15, 10, 0x37);
6426
6427 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6428 if (INSTR (22, 22))
6429 {
6430 if (! full)
6431 HALT_UNALLOC;
6432
6433 for (i = 0; i < 2; i++)
6434 aarch64_set_vec_double (cpu, vd, i,
6435 aarch64_get_vec_double (cpu, vn, i)
6436 * aarch64_get_vec_double (cpu, vm, i));
6437 }
6438 else
6439 for (i = 0; i < (full ? 4 : 2); i++)
6440 aarch64_set_vec_float (cpu, vd, i,
6441 aarch64_get_vec_float (cpu, vn, i)
6442 * aarch64_get_vec_float (cpu, vm, i));
6443 }
6444
6445 static void
6446 do_vec_FADDP (sim_cpu *cpu)
6447 {
6448 /* instr [31] = 0
6449 instr [30] = half(0)/full(1)
6450 instr [29,23] = 10 1110 0
6451 instr [22] = float(0)/double(1)
6452 instr [21] = 1
6453 instr [20,16] = Vm
6454 instr [15,10] = 1101 01
6455 instr [9, 5] = Vn
6456 instr [4, 0] = Vd. */
6457
6458 unsigned full = INSTR (30, 30);
6459 unsigned vm = INSTR (20, 16);
6460 unsigned vn = INSTR (9, 5);
6461 unsigned vd = INSTR (4, 0);
6462
6463 NYI_assert (29, 23, 0x5C);
6464 NYI_assert (21, 21, 1);
6465 NYI_assert (15, 10, 0x35);
6466
6467 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6468 if (INSTR (22, 22))
6469 {
6470 /* Extract values before adding them incase vd == vn/vm. */
6471 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6472 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6473 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6474 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6475
6476 if (! full)
6477 HALT_UNALLOC;
6478
6479 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6480 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6481 }
6482 else
6483 {
6484 /* Extract values before adding them incase vd == vn/vm. */
6485 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6486 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6487 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6488 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6489
6490 if (full)
6491 {
6492 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6493 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6494 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6495 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6496
6497 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6498 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6499 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6500 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6501 }
6502 else
6503 {
6504 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6505 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6506 }
6507 }
6508 }
6509
6510 static void
6511 do_vec_FSQRT (sim_cpu *cpu)
6512 {
6513 /* instr[31] = 0
6514 instr[30] = half(0)/full(1)
6515 instr[29,23] = 10 1110 1
6516 instr[22] = single(0)/double(1)
6517 instr[21,10] = 10 0001 1111 10
6518 instr[9,5] = Vsrc
6519 instr[4,0] = Vdest. */
6520
6521 unsigned vn = INSTR (9, 5);
6522 unsigned vd = INSTR (4, 0);
6523 unsigned full = INSTR (30, 30);
6524 int i;
6525
6526 NYI_assert (29, 23, 0x5D);
6527 NYI_assert (21, 10, 0x87E);
6528
6529 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6530 if (INSTR (22, 22))
6531 {
6532 if (! full)
6533 HALT_UNALLOC;
6534
6535 for (i = 0; i < 2; i++)
6536 aarch64_set_vec_double (cpu, vd, i,
6537 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6538 }
6539 else
6540 {
6541 for (i = 0; i < (full ? 4 : 2); i++)
6542 aarch64_set_vec_float (cpu, vd, i,
6543 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6544 }
6545 }
6546
6547 static void
6548 do_vec_FNEG (sim_cpu *cpu)
6549 {
6550 /* instr[31] = 0
6551 instr[30] = half (0)/full (1)
6552 instr[29,23] = 10 1110 1
6553 instr[22] = single (0)/double (1)
6554 instr[21,10] = 10 0000 1111 10
6555 instr[9,5] = Vsrc
6556 instr[4,0] = Vdest. */
6557
6558 unsigned vn = INSTR (9, 5);
6559 unsigned vd = INSTR (4, 0);
6560 unsigned full = INSTR (30, 30);
6561 int i;
6562
6563 NYI_assert (29, 23, 0x5D);
6564 NYI_assert (21, 10, 0x83E);
6565
6566 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6567 if (INSTR (22, 22))
6568 {
6569 if (! full)
6570 HALT_UNALLOC;
6571
6572 for (i = 0; i < 2; i++)
6573 aarch64_set_vec_double (cpu, vd, i,
6574 - aarch64_get_vec_double (cpu, vn, i));
6575 }
6576 else
6577 {
6578 for (i = 0; i < (full ? 4 : 2); i++)
6579 aarch64_set_vec_float (cpu, vd, i,
6580 - aarch64_get_vec_float (cpu, vn, i));
6581 }
6582 }
6583
6584 static void
6585 do_vec_NOT (sim_cpu *cpu)
6586 {
6587 /* instr[31] = 0
6588 instr[30] = half (0)/full (1)
6589 instr[29,10] = 10 1110 0010 0000 0101 10
6590 instr[9,5] = Vn
6591 instr[4.0] = Vd. */
6592
6593 unsigned vn = INSTR (9, 5);
6594 unsigned vd = INSTR (4, 0);
6595 unsigned i;
6596 int full = INSTR (30, 30);
6597
6598 NYI_assert (29, 10, 0xB8816);
6599
6600 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6601 for (i = 0; i < (full ? 16 : 8); i++)
6602 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6603 }
6604
6605 static unsigned int
6606 clz (uint64_t val, unsigned size)
6607 {
6608 uint64_t mask = 1;
6609 int count;
6610
6611 mask <<= (size - 1);
6612 count = 0;
6613 do
6614 {
6615 if (val & mask)
6616 break;
6617 mask >>= 1;
6618 count ++;
6619 }
6620 while (mask);
6621
6622 return count;
6623 }
6624
6625 static void
6626 do_vec_CLZ (sim_cpu *cpu)
6627 {
6628 /* instr[31] = 0
6629 instr[30] = half (0)/full (1)
6630 instr[29,24] = 10 1110
6631 instr[23,22] = size
6632 instr[21,10] = 10 0000 0100 10
6633 instr[9,5] = Vn
6634 instr[4.0] = Vd. */
6635
6636 unsigned vn = INSTR (9, 5);
6637 unsigned vd = INSTR (4, 0);
6638 unsigned i;
6639 int full = INSTR (30,30);
6640
6641 NYI_assert (29, 24, 0x2E);
6642 NYI_assert (21, 10, 0x812);
6643
6644 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6645 switch (INSTR (23, 22))
6646 {
6647 case 0:
6648 for (i = 0; i < (full ? 16 : 8); i++)
6649 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6650 break;
6651 case 1:
6652 for (i = 0; i < (full ? 8 : 4); i++)
6653 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6654 break;
6655 case 2:
6656 for (i = 0; i < (full ? 4 : 2); i++)
6657 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6658 break;
6659 case 3:
6660 if (! full)
6661 HALT_UNALLOC;
6662 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6663 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6664 break;
6665 }
6666 }
6667
6668 static void
6669 do_vec_MOV_element (sim_cpu *cpu)
6670 {
6671 /* instr[31,21] = 0110 1110 000
6672 instr[20,16] = size & dest index
6673 instr[15] = 0
6674 instr[14,11] = source index
6675 instr[10] = 1
6676 instr[9,5] = Vs
6677 instr[4.0] = Vd. */
6678
6679 unsigned vs = INSTR (9, 5);
6680 unsigned vd = INSTR (4, 0);
6681 unsigned src_index;
6682 unsigned dst_index;
6683
6684 NYI_assert (31, 21, 0x370);
6685 NYI_assert (15, 15, 0);
6686 NYI_assert (10, 10, 1);
6687
6688 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6689 if (INSTR (16, 16))
6690 {
6691 /* Move a byte. */
6692 src_index = INSTR (14, 11);
6693 dst_index = INSTR (20, 17);
6694 aarch64_set_vec_u8 (cpu, vd, dst_index,
6695 aarch64_get_vec_u8 (cpu, vs, src_index));
6696 }
6697 else if (INSTR (17, 17))
6698 {
6699 /* Move 16-bits. */
6700 NYI_assert (11, 11, 0);
6701 src_index = INSTR (14, 12);
6702 dst_index = INSTR (20, 18);
6703 aarch64_set_vec_u16 (cpu, vd, dst_index,
6704 aarch64_get_vec_u16 (cpu, vs, src_index));
6705 }
6706 else if (INSTR (18, 18))
6707 {
6708 /* Move 32-bits. */
6709 NYI_assert (12, 11, 0);
6710 src_index = INSTR (14, 13);
6711 dst_index = INSTR (20, 19);
6712 aarch64_set_vec_u32 (cpu, vd, dst_index,
6713 aarch64_get_vec_u32 (cpu, vs, src_index));
6714 }
6715 else
6716 {
6717 NYI_assert (19, 19, 1);
6718 NYI_assert (13, 11, 0);
6719 src_index = INSTR (14, 14);
6720 dst_index = INSTR (20, 20);
6721 aarch64_set_vec_u64 (cpu, vd, dst_index,
6722 aarch64_get_vec_u64 (cpu, vs, src_index));
6723 }
6724 }
6725
6726 static void
6727 do_vec_REV32 (sim_cpu *cpu)
6728 {
6729 /* instr[31] = 0
6730 instr[30] = full/half
6731 instr[29,24] = 10 1110
6732 instr[23,22] = size
6733 instr[21,10] = 10 0000 0000 10
6734 instr[9,5] = Rn
6735 instr[4,0] = Rd. */
6736
6737 unsigned rn = INSTR (9, 5);
6738 unsigned rd = INSTR (4, 0);
6739 unsigned size = INSTR (23, 22);
6740 unsigned full = INSTR (30, 30);
6741 unsigned i;
6742 FRegister val;
6743
6744 NYI_assert (29, 24, 0x2E);
6745 NYI_assert (21, 10, 0x802);
6746
6747 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6748 switch (size)
6749 {
6750 case 0:
6751 for (i = 0; i < (full ? 16 : 8); i++)
6752 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6753 break;
6754
6755 case 1:
6756 for (i = 0; i < (full ? 8 : 4); i++)
6757 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6758 break;
6759
6760 default:
6761 HALT_UNALLOC;
6762 }
6763
6764 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6765 if (full)
6766 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6767 }
6768
6769 static void
6770 do_vec_EXT (sim_cpu *cpu)
6771 {
6772 /* instr[31] = 0
6773 instr[30] = full/half
6774 instr[29,21] = 10 1110 000
6775 instr[20,16] = Vm
6776 instr[15] = 0
6777 instr[14,11] = source index
6778 instr[10] = 0
6779 instr[9,5] = Vn
6780 instr[4.0] = Vd. */
6781
6782 unsigned vm = INSTR (20, 16);
6783 unsigned vn = INSTR (9, 5);
6784 unsigned vd = INSTR (4, 0);
6785 unsigned src_index = INSTR (14, 11);
6786 unsigned full = INSTR (30, 30);
6787 unsigned i;
6788 unsigned j;
6789 FRegister val;
6790
6791 NYI_assert (31, 21, 0x370);
6792 NYI_assert (15, 15, 0);
6793 NYI_assert (10, 10, 0);
6794
6795 if (!full && (src_index & 0x8))
6796 HALT_UNALLOC;
6797
6798 j = 0;
6799
6800 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6801 for (i = src_index; i < (full ? 16 : 8); i++)
6802 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6803 for (i = 0; i < src_index; i++)
6804 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6805
6806 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6807 if (full)
6808 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6809 }
6810
6811 static void
6812 dexAdvSIMD0 (sim_cpu *cpu)
6813 {
6814 /* instr [28,25] = 0 111. */
6815 if ( INSTR (15, 10) == 0x07
6816 && (INSTR (9, 5) ==
6817 INSTR (20, 16)))
6818 {
6819 if (INSTR (31, 21) == 0x075
6820 || INSTR (31, 21) == 0x275)
6821 {
6822 do_vec_MOV_whole_vector (cpu);
6823 return;
6824 }
6825 }
6826
6827 if (INSTR (29, 19) == 0x1E0)
6828 {
6829 do_vec_MOV_immediate (cpu);
6830 return;
6831 }
6832
6833 if (INSTR (29, 19) == 0x5E0)
6834 {
6835 do_vec_MVNI (cpu);
6836 return;
6837 }
6838
6839 if (INSTR (29, 19) == 0x1C0
6840 || INSTR (29, 19) == 0x1C1)
6841 {
6842 if (INSTR (15, 10) == 0x03)
6843 {
6844 do_vec_DUP_scalar_into_vector (cpu);
6845 return;
6846 }
6847 }
6848
6849 switch (INSTR (29, 24))
6850 {
6851 case 0x0E: do_vec_op1 (cpu); return;
6852 case 0x0F: do_vec_op2 (cpu); return;
6853
6854 case 0x2E:
6855 if (INSTR (21, 21) == 1)
6856 {
6857 switch (INSTR (15, 10))
6858 {
6859 case 0x02:
6860 do_vec_REV32 (cpu);
6861 return;
6862
6863 case 0x07:
6864 switch (INSTR (23, 22))
6865 {
6866 case 0: do_vec_EOR (cpu); return;
6867 case 1: do_vec_BSL (cpu); return;
6868 case 2:
6869 case 3: do_vec_bit (cpu); return;
6870 }
6871 break;
6872
6873 case 0x08: do_vec_sub_long (cpu); return;
6874 case 0x11: do_vec_USHL (cpu); return;
6875 case 0x12: do_vec_CLZ (cpu); return;
6876 case 0x16: do_vec_NOT (cpu); return;
6877 case 0x19: do_vec_max (cpu); return;
6878 case 0x1B: do_vec_min (cpu); return;
6879 case 0x21: do_vec_SUB (cpu); return;
6880 case 0x25: do_vec_MLS (cpu); return;
6881 case 0x31: do_vec_FminmaxNMP (cpu); return;
6882 case 0x35: do_vec_FADDP (cpu); return;
6883 case 0x37: do_vec_FMUL (cpu); return;
6884 case 0x3F: do_vec_FDIV (cpu); return;
6885
6886 case 0x3E:
6887 switch (INSTR (20, 16))
6888 {
6889 case 0x00: do_vec_FNEG (cpu); return;
6890 case 0x01: do_vec_FSQRT (cpu); return;
6891 default: HALT_NYI;
6892 }
6893
6894 case 0x0D:
6895 case 0x0F:
6896 case 0x22:
6897 case 0x23:
6898 case 0x26:
6899 case 0x2A:
6900 case 0x32:
6901 case 0x36:
6902 case 0x39:
6903 case 0x3A:
6904 do_vec_compare (cpu); return;
6905
6906 default:
6907 break;
6908 }
6909 }
6910
6911 if (INSTR (31, 21) == 0x370)
6912 {
6913 if (INSTR (10, 10))
6914 do_vec_MOV_element (cpu);
6915 else
6916 do_vec_EXT (cpu);
6917 return;
6918 }
6919
6920 switch (INSTR (21, 10))
6921 {
6922 case 0x82E: do_vec_neg (cpu); return;
6923 case 0x87E: do_vec_sqrt (cpu); return;
6924 default:
6925 if (INSTR (15, 10) == 0x30)
6926 {
6927 do_vec_mull (cpu);
6928 return;
6929 }
6930 break;
6931 }
6932 break;
6933
6934 case 0x2f:
6935 switch (INSTR (15, 10))
6936 {
6937 case 0x01: do_vec_SSHR_USHR (cpu); return;
6938 case 0x10:
6939 case 0x12: do_vec_mls_indexed (cpu); return;
6940 case 0x29: do_vec_xtl (cpu); return;
6941 default:
6942 HALT_NYI;
6943 }
6944
6945 default:
6946 break;
6947 }
6948
6949 HALT_NYI;
6950 }
6951
6952 /* 3 sources. */
6953
6954 /* Float multiply add. */
6955 static void
6956 fmadds (sim_cpu *cpu)
6957 {
6958 unsigned sa = INSTR (14, 10);
6959 unsigned sm = INSTR (20, 16);
6960 unsigned sn = INSTR ( 9, 5);
6961 unsigned sd = INSTR ( 4, 0);
6962
6963 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6964 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6965 + aarch64_get_FP_float (cpu, sn)
6966 * aarch64_get_FP_float (cpu, sm));
6967 }
6968
6969 /* Double multiply add. */
6970 static void
6971 fmaddd (sim_cpu *cpu)
6972 {
6973 unsigned sa = INSTR (14, 10);
6974 unsigned sm = INSTR (20, 16);
6975 unsigned sn = INSTR ( 9, 5);
6976 unsigned sd = INSTR ( 4, 0);
6977
6978 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6979 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6980 + aarch64_get_FP_double (cpu, sn)
6981 * aarch64_get_FP_double (cpu, sm));
6982 }
6983
6984 /* Float multiply subtract. */
6985 static void
6986 fmsubs (sim_cpu *cpu)
6987 {
6988 unsigned sa = INSTR (14, 10);
6989 unsigned sm = INSTR (20, 16);
6990 unsigned sn = INSTR ( 9, 5);
6991 unsigned sd = INSTR ( 4, 0);
6992
6993 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6994 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6995 - aarch64_get_FP_float (cpu, sn)
6996 * aarch64_get_FP_float (cpu, sm));
6997 }
6998
6999 /* Double multiply subtract. */
7000 static void
7001 fmsubd (sim_cpu *cpu)
7002 {
7003 unsigned sa = INSTR (14, 10);
7004 unsigned sm = INSTR (20, 16);
7005 unsigned sn = INSTR ( 9, 5);
7006 unsigned sd = INSTR ( 4, 0);
7007
7008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7009 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7010 - aarch64_get_FP_double (cpu, sn)
7011 * aarch64_get_FP_double (cpu, sm));
7012 }
7013
7014 /* Float negative multiply add. */
7015 static void
7016 fnmadds (sim_cpu *cpu)
7017 {
7018 unsigned sa = INSTR (14, 10);
7019 unsigned sm = INSTR (20, 16);
7020 unsigned sn = INSTR ( 9, 5);
7021 unsigned sd = INSTR ( 4, 0);
7022
7023 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7024 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7025 + (- aarch64_get_FP_float (cpu, sn))
7026 * aarch64_get_FP_float (cpu, sm));
7027 }
7028
7029 /* Double negative multiply add. */
7030 static void
7031 fnmaddd (sim_cpu *cpu)
7032 {
7033 unsigned sa = INSTR (14, 10);
7034 unsigned sm = INSTR (20, 16);
7035 unsigned sn = INSTR ( 9, 5);
7036 unsigned sd = INSTR ( 4, 0);
7037
7038 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7039 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7040 + (- aarch64_get_FP_double (cpu, sn))
7041 * aarch64_get_FP_double (cpu, sm));
7042 }
7043
7044 /* Float negative multiply subtract. */
7045 static void
7046 fnmsubs (sim_cpu *cpu)
7047 {
7048 unsigned sa = INSTR (14, 10);
7049 unsigned sm = INSTR (20, 16);
7050 unsigned sn = INSTR ( 9, 5);
7051 unsigned sd = INSTR ( 4, 0);
7052
7053 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7054 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7055 + aarch64_get_FP_float (cpu, sn)
7056 * aarch64_get_FP_float (cpu, sm));
7057 }
7058
7059 /* Double negative multiply subtract. */
7060 static void
7061 fnmsubd (sim_cpu *cpu)
7062 {
7063 unsigned sa = INSTR (14, 10);
7064 unsigned sm = INSTR (20, 16);
7065 unsigned sn = INSTR ( 9, 5);
7066 unsigned sd = INSTR ( 4, 0);
7067
7068 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7069 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7070 + aarch64_get_FP_double (cpu, sn)
7071 * aarch64_get_FP_double (cpu, sm));
7072 }
7073
7074 static void
7075 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7076 {
7077 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7078 instr[30] = 0
7079 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7080 instr[28,25] = 1111
7081 instr[24] = 1
7082 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7083 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7084 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7085
7086 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7087 /* dispatch on combined type:o1:o2. */
7088 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7089
7090 if (M_S != 0)
7091 HALT_UNALLOC;
7092
7093 switch (dispatch)
7094 {
7095 case 0: fmadds (cpu); return;
7096 case 1: fmsubs (cpu); return;
7097 case 2: fnmadds (cpu); return;
7098 case 3: fnmsubs (cpu); return;
7099 case 4: fmaddd (cpu); return;
7100 case 5: fmsubd (cpu); return;
7101 case 6: fnmaddd (cpu); return;
7102 case 7: fnmsubd (cpu); return;
7103 default:
7104 /* type > 1 is currently unallocated. */
7105 HALT_UNALLOC;
7106 }
7107 }
7108
7109 static void
7110 dexSimpleFPFixedConvert (sim_cpu *cpu)
7111 {
7112 HALT_NYI;
7113 }
7114
7115 static void
7116 dexSimpleFPCondCompare (sim_cpu *cpu)
7117 {
7118 /* instr [31,23] = 0001 1110 0
7119 instr [22] = type
7120 instr [21] = 1
7121 instr [20,16] = Rm
7122 instr [15,12] = condition
7123 instr [11,10] = 01
7124 instr [9,5] = Rn
7125 instr [4] = 0
7126 instr [3,0] = nzcv */
7127
7128 unsigned rm = INSTR (20, 16);
7129 unsigned rn = INSTR (9, 5);
7130
7131 NYI_assert (31, 23, 0x3C);
7132 NYI_assert (11, 10, 0x1);
7133 NYI_assert (4, 4, 0);
7134
7135 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7136 if (! testConditionCode (cpu, INSTR (15, 12)))
7137 {
7138 aarch64_set_CPSR (cpu, INSTR (3, 0));
7139 return;
7140 }
7141
7142 if (INSTR (22, 22))
7143 {
7144 /* Double precision. */
7145 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7146 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7147
7148 /* FIXME: Check for NaNs. */
7149 if (val1 == val2)
7150 aarch64_set_CPSR (cpu, (Z | C));
7151 else if (val1 < val2)
7152 aarch64_set_CPSR (cpu, N);
7153 else /* val1 > val2 */
7154 aarch64_set_CPSR (cpu, C);
7155 }
7156 else
7157 {
7158 /* Single precision. */
7159 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7160 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7161
7162 /* FIXME: Check for NaNs. */
7163 if (val1 == val2)
7164 aarch64_set_CPSR (cpu, (Z | C));
7165 else if (val1 < val2)
7166 aarch64_set_CPSR (cpu, N);
7167 else /* val1 > val2 */
7168 aarch64_set_CPSR (cpu, C);
7169 }
7170 }
7171
7172 /* 2 sources. */
7173
7174 /* Float add. */
7175 static void
7176 fadds (sim_cpu *cpu)
7177 {
7178 unsigned sm = INSTR (20, 16);
7179 unsigned sn = INSTR ( 9, 5);
7180 unsigned sd = INSTR ( 4, 0);
7181
7182 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7183 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7184 + aarch64_get_FP_float (cpu, sm));
7185 }
7186
7187 /* Double add. */
7188 static void
7189 faddd (sim_cpu *cpu)
7190 {
7191 unsigned sm = INSTR (20, 16);
7192 unsigned sn = INSTR ( 9, 5);
7193 unsigned sd = INSTR ( 4, 0);
7194
7195 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7196 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7197 + aarch64_get_FP_double (cpu, sm));
7198 }
7199
7200 /* Float divide. */
7201 static void
7202 fdivs (sim_cpu *cpu)
7203 {
7204 unsigned sm = INSTR (20, 16);
7205 unsigned sn = INSTR ( 9, 5);
7206 unsigned sd = INSTR ( 4, 0);
7207
7208 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7209 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7210 / aarch64_get_FP_float (cpu, sm));
7211 }
7212
7213 /* Double divide. */
7214 static void
7215 fdivd (sim_cpu *cpu)
7216 {
7217 unsigned sm = INSTR (20, 16);
7218 unsigned sn = INSTR ( 9, 5);
7219 unsigned sd = INSTR ( 4, 0);
7220
7221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7222 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7223 / aarch64_get_FP_double (cpu, sm));
7224 }
7225
7226 /* Float multiply. */
7227 static void
7228 fmuls (sim_cpu *cpu)
7229 {
7230 unsigned sm = INSTR (20, 16);
7231 unsigned sn = INSTR ( 9, 5);
7232 unsigned sd = INSTR ( 4, 0);
7233
7234 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7235 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7236 * aarch64_get_FP_float (cpu, sm));
7237 }
7238
7239 /* Double multiply. */
7240 static void
7241 fmuld (sim_cpu *cpu)
7242 {
7243 unsigned sm = INSTR (20, 16);
7244 unsigned sn = INSTR ( 9, 5);
7245 unsigned sd = INSTR ( 4, 0);
7246
7247 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7248 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7249 * aarch64_get_FP_double (cpu, sm));
7250 }
7251
7252 /* Float negate and multiply. */
7253 static void
7254 fnmuls (sim_cpu *cpu)
7255 {
7256 unsigned sm = INSTR (20, 16);
7257 unsigned sn = INSTR ( 9, 5);
7258 unsigned sd = INSTR ( 4, 0);
7259
7260 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7261 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7262 * aarch64_get_FP_float (cpu, sm)));
7263 }
7264
7265 /* Double negate and multiply. */
7266 static void
7267 fnmuld (sim_cpu *cpu)
7268 {
7269 unsigned sm = INSTR (20, 16);
7270 unsigned sn = INSTR ( 9, 5);
7271 unsigned sd = INSTR ( 4, 0);
7272
7273 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7274 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7275 * aarch64_get_FP_double (cpu, sm)));
7276 }
7277
7278 /* Float subtract. */
7279 static void
7280 fsubs (sim_cpu *cpu)
7281 {
7282 unsigned sm = INSTR (20, 16);
7283 unsigned sn = INSTR ( 9, 5);
7284 unsigned sd = INSTR ( 4, 0);
7285
7286 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7287 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7288 - aarch64_get_FP_float (cpu, sm));
7289 }
7290
7291 /* Double subtract. */
7292 static void
7293 fsubd (sim_cpu *cpu)
7294 {
7295 unsigned sm = INSTR (20, 16);
7296 unsigned sn = INSTR ( 9, 5);
7297 unsigned sd = INSTR ( 4, 0);
7298
7299 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7300 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7301 - aarch64_get_FP_double (cpu, sm));
7302 }
7303
7304 static void
7305 do_FMINNM (sim_cpu *cpu)
7306 {
7307 /* instr[31,23] = 0 0011 1100
7308 instr[22] = float(0)/double(1)
7309 instr[21] = 1
7310 instr[20,16] = Sm
7311 instr[15,10] = 01 1110
7312 instr[9,5] = Sn
7313 instr[4,0] = Cpu */
7314
7315 unsigned sm = INSTR (20, 16);
7316 unsigned sn = INSTR ( 9, 5);
7317 unsigned sd = INSTR ( 4, 0);
7318
7319 NYI_assert (31, 23, 0x03C);
7320 NYI_assert (15, 10, 0x1E);
7321
7322 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7323 if (INSTR (22, 22))
7324 aarch64_set_FP_double (cpu, sd,
7325 dminnm (aarch64_get_FP_double (cpu, sn),
7326 aarch64_get_FP_double (cpu, sm)));
7327 else
7328 aarch64_set_FP_float (cpu, sd,
7329 fminnm (aarch64_get_FP_float (cpu, sn),
7330 aarch64_get_FP_float (cpu, sm)));
7331 }
7332
7333 static void
7334 do_FMAXNM (sim_cpu *cpu)
7335 {
7336 /* instr[31,23] = 0 0011 1100
7337 instr[22] = float(0)/double(1)
7338 instr[21] = 1
7339 instr[20,16] = Sm
7340 instr[15,10] = 01 1010
7341 instr[9,5] = Sn
7342 instr[4,0] = Cpu */
7343
7344 unsigned sm = INSTR (20, 16);
7345 unsigned sn = INSTR ( 9, 5);
7346 unsigned sd = INSTR ( 4, 0);
7347
7348 NYI_assert (31, 23, 0x03C);
7349 NYI_assert (15, 10, 0x1A);
7350
7351 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7352 if (INSTR (22, 22))
7353 aarch64_set_FP_double (cpu, sd,
7354 dmaxnm (aarch64_get_FP_double (cpu, sn),
7355 aarch64_get_FP_double (cpu, sm)));
7356 else
7357 aarch64_set_FP_float (cpu, sd,
7358 fmaxnm (aarch64_get_FP_float (cpu, sn),
7359 aarch64_get_FP_float (cpu, sm)));
7360 }
7361
7362 static void
7363 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7364 {
7365 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7366 instr[30] = 0
7367 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7368 instr[28,25] = 1111
7369 instr[24] = 0
7370 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7371 instr[21] = 1
7372 instr[20,16] = Vm
7373 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7374 0010 ==> FADD, 0011 ==> FSUB,
7375 0100 ==> FMAX, 0101 ==> FMIN
7376 0110 ==> FMAXNM, 0111 ==> FMINNM
7377 1000 ==> FNMUL, ow ==> UNALLOC
7378 instr[11,10] = 10
7379 instr[9,5] = Vn
7380 instr[4,0] = Vd */
7381
7382 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7383 uint32_t type = INSTR (23, 22);
7384 /* Dispatch on opcode. */
7385 uint32_t dispatch = INSTR (15, 12);
7386
7387 if (type > 1)
7388 HALT_UNALLOC;
7389
7390 if (M_S != 0)
7391 HALT_UNALLOC;
7392
7393 if (type)
7394 switch (dispatch)
7395 {
7396 case 0: fmuld (cpu); return;
7397 case 1: fdivd (cpu); return;
7398 case 2: faddd (cpu); return;
7399 case 3: fsubd (cpu); return;
7400 case 6: do_FMAXNM (cpu); return;
7401 case 7: do_FMINNM (cpu); return;
7402 case 8: fnmuld (cpu); return;
7403
7404 /* Have not yet implemented fmax and fmin. */
7405 case 4:
7406 case 5:
7407 HALT_NYI;
7408
7409 default:
7410 HALT_UNALLOC;
7411 }
7412 else /* type == 0 => floats. */
7413 switch (dispatch)
7414 {
7415 case 0: fmuls (cpu); return;
7416 case 1: fdivs (cpu); return;
7417 case 2: fadds (cpu); return;
7418 case 3: fsubs (cpu); return;
7419 case 6: do_FMAXNM (cpu); return;
7420 case 7: do_FMINNM (cpu); return;
7421 case 8: fnmuls (cpu); return;
7422
7423 case 4:
7424 case 5:
7425 HALT_NYI;
7426
7427 default:
7428 HALT_UNALLOC;
7429 }
7430 }
7431
7432 static void
7433 dexSimpleFPCondSelect (sim_cpu *cpu)
7434 {
7435 /* FCSEL
7436 instr[31,23] = 0 0011 1100
7437 instr[22] = 0=>single 1=>double
7438 instr[21] = 1
7439 instr[20,16] = Sm
7440 instr[15,12] = cond
7441 instr[11,10] = 11
7442 instr[9,5] = Sn
7443 instr[4,0] = Cpu */
7444 unsigned sm = INSTR (20, 16);
7445 unsigned sn = INSTR ( 9, 5);
7446 unsigned sd = INSTR ( 4, 0);
7447 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7448
7449 NYI_assert (31, 23, 0x03C);
7450 NYI_assert (11, 10, 0x3);
7451
7452 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7453 if (INSTR (22, 22))
7454 aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7455 : aarch64_get_FP_double (cpu, sm)));
7456 else
7457 aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7458 : aarch64_get_FP_float (cpu, sm)));
7459 }
7460
7461 /* Store 32 bit unscaled signed 9 bit. */
7462 static void
7463 fsturs (sim_cpu *cpu, int32_t offset)
7464 {
7465 unsigned int rn = INSTR (9, 5);
7466 unsigned int st = INSTR (4, 0);
7467
7468 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7469 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7470 aarch64_get_vec_u32 (cpu, st, 0));
7471 }
7472
7473 /* Store 64 bit unscaled signed 9 bit. */
7474 static void
7475 fsturd (sim_cpu *cpu, int32_t offset)
7476 {
7477 unsigned int rn = INSTR (9, 5);
7478 unsigned int st = INSTR (4, 0);
7479
7480 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7481 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7482 aarch64_get_vec_u64 (cpu, st, 0));
7483 }
7484
7485 /* Store 128 bit unscaled signed 9 bit. */
7486 static void
7487 fsturq (sim_cpu *cpu, int32_t offset)
7488 {
7489 unsigned int rn = INSTR (9, 5);
7490 unsigned int st = INSTR (4, 0);
7491 FRegister a;
7492
7493 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7494 aarch64_get_FP_long_double (cpu, st, & a);
7495 aarch64_set_mem_long_double (cpu,
7496 aarch64_get_reg_u64 (cpu, rn, 1)
7497 + offset, a);
7498 }
7499
7500 /* TODO FP move register. */
7501
7502 /* 32 bit fp to fp move register. */
7503 static void
7504 ffmovs (sim_cpu *cpu)
7505 {
7506 unsigned int rn = INSTR (9, 5);
7507 unsigned int st = INSTR (4, 0);
7508
7509 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7510 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7511 }
7512
7513 /* 64 bit fp to fp move register. */
7514 static void
7515 ffmovd (sim_cpu *cpu)
7516 {
7517 unsigned int rn = INSTR (9, 5);
7518 unsigned int st = INSTR (4, 0);
7519
7520 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7521 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7522 }
7523
7524 /* 32 bit GReg to Vec move register. */
7525 static void
7526 fgmovs (sim_cpu *cpu)
7527 {
7528 unsigned int rn = INSTR (9, 5);
7529 unsigned int st = INSTR (4, 0);
7530
7531 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7532 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7533 }
7534
7535 /* 64 bit g to fp move register. */
7536 static void
7537 fgmovd (sim_cpu *cpu)
7538 {
7539 unsigned int rn = INSTR (9, 5);
7540 unsigned int st = INSTR (4, 0);
7541
7542 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7543 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7544 }
7545
7546 /* 32 bit fp to g move register. */
7547 static void
7548 gfmovs (sim_cpu *cpu)
7549 {
7550 unsigned int rn = INSTR (9, 5);
7551 unsigned int st = INSTR (4, 0);
7552
7553 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7554 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7555 }
7556
7557 /* 64 bit fp to g move register. */
7558 static void
7559 gfmovd (sim_cpu *cpu)
7560 {
7561 unsigned int rn = INSTR (9, 5);
7562 unsigned int st = INSTR (4, 0);
7563
7564 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7565 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7566 }
7567
7568 /* FP move immediate
7569
7570 These install an immediate 8 bit value in the target register
7571 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7572 bit exponent. */
7573
7574 static void
7575 fmovs (sim_cpu *cpu)
7576 {
7577 unsigned int sd = INSTR (4, 0);
7578 uint32_t imm = INSTR (20, 13);
7579 float f = fp_immediate_for_encoding_32 (imm);
7580
7581 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7582 aarch64_set_FP_float (cpu, sd, f);
7583 }
7584
7585 static void
7586 fmovd (sim_cpu *cpu)
7587 {
7588 unsigned int sd = INSTR (4, 0);
7589 uint32_t imm = INSTR (20, 13);
7590 double d = fp_immediate_for_encoding_64 (imm);
7591
7592 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7593 aarch64_set_FP_double (cpu, sd, d);
7594 }
7595
7596 static void
7597 dexSimpleFPImmediate (sim_cpu *cpu)
7598 {
7599 /* instr[31,23] == 00111100
7600 instr[22] == type : single(0)/double(1)
7601 instr[21] == 1
7602 instr[20,13] == imm8
7603 instr[12,10] == 100
7604 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7605 instr[4,0] == Rd */
7606 uint32_t imm5 = INSTR (9, 5);
7607
7608 NYI_assert (31, 23, 0x3C);
7609
7610 if (imm5 != 0)
7611 HALT_UNALLOC;
7612
7613 if (INSTR (22, 22))
7614 fmovd (cpu);
7615 else
7616 fmovs (cpu);
7617 }
7618
7619 /* TODO specific decode and execute for group Load Store. */
7620
7621 /* TODO FP load/store single register (unscaled offset). */
7622
7623 /* TODO load 8 bit unscaled signed 9 bit. */
7624 /* TODO load 16 bit unscaled signed 9 bit. */
7625
7626 /* Load 32 bit unscaled signed 9 bit. */
7627 static void
7628 fldurs (sim_cpu *cpu, int32_t offset)
7629 {
7630 unsigned int rn = INSTR (9, 5);
7631 unsigned int st = INSTR (4, 0);
7632
7633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7634 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7635 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7636 }
7637
7638 /* Load 64 bit unscaled signed 9 bit. */
7639 static void
7640 fldurd (sim_cpu *cpu, int32_t offset)
7641 {
7642 unsigned int rn = INSTR (9, 5);
7643 unsigned int st = INSTR (4, 0);
7644
7645 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7646 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7647 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7648 }
7649
7650 /* Load 128 bit unscaled signed 9 bit. */
7651 static void
7652 fldurq (sim_cpu *cpu, int32_t offset)
7653 {
7654 unsigned int rn = INSTR (9, 5);
7655 unsigned int st = INSTR (4, 0);
7656 FRegister a;
7657 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7658
7659 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7660 aarch64_get_mem_long_double (cpu, addr, & a);
7661 aarch64_set_FP_long_double (cpu, st, a);
7662 }
7663
7664 /* TODO store 8 bit unscaled signed 9 bit. */
7665 /* TODO store 16 bit unscaled signed 9 bit. */
7666
7667
7668 /* 1 source. */
7669
7670 /* Float absolute value. */
7671 static void
7672 fabss (sim_cpu *cpu)
7673 {
7674 unsigned sn = INSTR (9, 5);
7675 unsigned sd = INSTR (4, 0);
7676 float value = aarch64_get_FP_float (cpu, sn);
7677
7678 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7679 aarch64_set_FP_float (cpu, sd, fabsf (value));
7680 }
7681
7682 /* Double absolute value. */
7683 static void
7684 fabcpu (sim_cpu *cpu)
7685 {
7686 unsigned sn = INSTR (9, 5);
7687 unsigned sd = INSTR (4, 0);
7688 double value = aarch64_get_FP_double (cpu, sn);
7689
7690 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7691 aarch64_set_FP_double (cpu, sd, fabs (value));
7692 }
7693
7694 /* Float negative value. */
7695 static void
7696 fnegs (sim_cpu *cpu)
7697 {
7698 unsigned sn = INSTR (9, 5);
7699 unsigned sd = INSTR (4, 0);
7700
7701 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7702 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7703 }
7704
7705 /* Double negative value. */
7706 static void
7707 fnegd (sim_cpu *cpu)
7708 {
7709 unsigned sn = INSTR (9, 5);
7710 unsigned sd = INSTR (4, 0);
7711
7712 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7713 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7714 }
7715
7716 /* Float square root. */
7717 static void
7718 fsqrts (sim_cpu *cpu)
7719 {
7720 unsigned sn = INSTR (9, 5);
7721 unsigned sd = INSTR (4, 0);
7722
7723 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7724 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7725 }
7726
7727 /* Double square root. */
7728 static void
7729 fsqrtd (sim_cpu *cpu)
7730 {
7731 unsigned sn = INSTR (9, 5);
7732 unsigned sd = INSTR (4, 0);
7733
7734 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7735 aarch64_set_FP_double (cpu, sd,
7736 sqrt (aarch64_get_FP_double (cpu, sn)));
7737 }
7738
7739 /* Convert double to float. */
7740 static void
7741 fcvtds (sim_cpu *cpu)
7742 {
7743 unsigned sn = INSTR (9, 5);
7744 unsigned sd = INSTR (4, 0);
7745
7746 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7747 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7748 }
7749
7750 /* Convert float to double. */
7751 static void
7752 fcvtcpu (sim_cpu *cpu)
7753 {
7754 unsigned sn = INSTR (9, 5);
7755 unsigned sd = INSTR (4, 0);
7756
7757 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7758 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7759 }
7760
7761 static void
7762 do_FRINT (sim_cpu *cpu)
7763 {
7764 /* instr[31,23] = 0001 1110 0
7765 instr[22] = single(0)/double(1)
7766 instr[21,18] = 1001
7767 instr[17,15] = rounding mode
7768 instr[14,10] = 10000
7769 instr[9,5] = source
7770 instr[4,0] = dest */
7771
7772 float val;
7773 unsigned rs = INSTR (9, 5);
7774 unsigned rd = INSTR (4, 0);
7775 unsigned int rmode = INSTR (17, 15);
7776
7777 NYI_assert (31, 23, 0x03C);
7778 NYI_assert (21, 18, 0x9);
7779 NYI_assert (14, 10, 0x10);
7780
7781 if (rmode == 6 || rmode == 7)
7782 /* FIXME: Add support for rmode == 6 exactness check. */
7783 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7784
7785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7786 if (INSTR (22, 22))
7787 {
7788 double val = aarch64_get_FP_double (cpu, rs);
7789
7790 switch (rmode)
7791 {
7792 case 0: /* mode N: nearest or even. */
7793 {
7794 double rval = round (val);
7795
7796 if (val - rval == 0.5)
7797 {
7798 if (((rval / 2.0) * 2.0) != rval)
7799 rval += 1.0;
7800 }
7801
7802 aarch64_set_FP_double (cpu, rd, round (val));
7803 return;
7804 }
7805
7806 case 1: /* mode P: towards +inf. */
7807 if (val < 0.0)
7808 aarch64_set_FP_double (cpu, rd, trunc (val));
7809 else
7810 aarch64_set_FP_double (cpu, rd, round (val));
7811 return;
7812
7813 case 2: /* mode M: towards -inf. */
7814 if (val < 0.0)
7815 aarch64_set_FP_double (cpu, rd, round (val));
7816 else
7817 aarch64_set_FP_double (cpu, rd, trunc (val));
7818 return;
7819
7820 case 3: /* mode Z: towards 0. */
7821 aarch64_set_FP_double (cpu, rd, trunc (val));
7822 return;
7823
7824 case 4: /* mode A: away from 0. */
7825 aarch64_set_FP_double (cpu, rd, round (val));
7826 return;
7827
7828 case 6: /* mode X: use FPCR with exactness check. */
7829 case 7: /* mode I: use FPCR mode. */
7830 HALT_NYI;
7831
7832 default:
7833 HALT_UNALLOC;
7834 }
7835 }
7836
7837 val = aarch64_get_FP_float (cpu, rs);
7838
7839 switch (rmode)
7840 {
7841 case 0: /* mode N: nearest or even. */
7842 {
7843 float rval = roundf (val);
7844
7845 if (val - rval == 0.5)
7846 {
7847 if (((rval / 2.0) * 2.0) != rval)
7848 rval += 1.0;
7849 }
7850
7851 aarch64_set_FP_float (cpu, rd, rval);
7852 return;
7853 }
7854
7855 case 1: /* mode P: towards +inf. */
7856 if (val < 0.0)
7857 aarch64_set_FP_float (cpu, rd, truncf (val));
7858 else
7859 aarch64_set_FP_float (cpu, rd, roundf (val));
7860 return;
7861
7862 case 2: /* mode M: towards -inf. */
7863 if (val < 0.0)
7864 aarch64_set_FP_float (cpu, rd, truncf (val));
7865 else
7866 aarch64_set_FP_float (cpu, rd, roundf (val));
7867 return;
7868
7869 case 3: /* mode Z: towards 0. */
7870 aarch64_set_FP_float (cpu, rd, truncf (val));
7871 return;
7872
7873 case 4: /* mode A: away from 0. */
7874 aarch64_set_FP_float (cpu, rd, roundf (val));
7875 return;
7876
7877 case 6: /* mode X: use FPCR with exactness check. */
7878 case 7: /* mode I: use FPCR mode. */
7879 HALT_NYI;
7880
7881 default:
7882 HALT_UNALLOC;
7883 }
7884 }
7885
7886 /* Convert half to float. */
7887 static void
7888 do_FCVT_half_to_single (sim_cpu *cpu)
7889 {
7890 unsigned rn = INSTR (9, 5);
7891 unsigned rd = INSTR (4, 0);
7892
7893 NYI_assert (31, 10, 0x7B890);
7894
7895 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7896 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
7897 }
7898
7899 /* Convert half to double. */
7900 static void
7901 do_FCVT_half_to_double (sim_cpu *cpu)
7902 {
7903 unsigned rn = INSTR (9, 5);
7904 unsigned rd = INSTR (4, 0);
7905
7906 NYI_assert (31, 10, 0x7B8B0);
7907
7908 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7909 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
7910 }
7911
7912 static void
7913 do_FCVT_single_to_half (sim_cpu *cpu)
7914 {
7915 unsigned rn = INSTR (9, 5);
7916 unsigned rd = INSTR (4, 0);
7917
7918 NYI_assert (31, 10, 0x788F0);
7919
7920 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7921 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
7922 }
7923
7924 /* Convert double to half. */
7925 static void
7926 do_FCVT_double_to_half (sim_cpu *cpu)
7927 {
7928 unsigned rn = INSTR (9, 5);
7929 unsigned rd = INSTR (4, 0);
7930
7931 NYI_assert (31, 10, 0x798F0);
7932
7933 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7934 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
7935 }
7936
7937 static void
7938 dexSimpleFPDataProc1Source (sim_cpu *cpu)
7939 {
7940 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7941 instr[30] = 0
7942 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7943 instr[28,25] = 1111
7944 instr[24] = 0
7945 instr[23,22] ==> type : 00 ==> source is single,
7946 01 ==> source is double
7947 10 ==> UNALLOC
7948 11 ==> UNALLOC or source is half
7949 instr[21] = 1
7950 instr[20,15] ==> opcode : with type 00 or 01
7951 000000 ==> FMOV, 000001 ==> FABS,
7952 000010 ==> FNEG, 000011 ==> FSQRT,
7953 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
7954 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
7955 001000 ==> FRINTN, 001001 ==> FRINTP,
7956 001010 ==> FRINTM, 001011 ==> FRINTZ,
7957 001100 ==> FRINTA, 001101 ==> UNALLOC
7958 001110 ==> FRINTX, 001111 ==> FRINTI
7959 with type 11
7960 000100 ==> FCVT (half-to-single)
7961 000101 ==> FCVT (half-to-double)
7962 instr[14,10] = 10000. */
7963
7964 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7965 uint32_t type = INSTR (23, 22);
7966 uint32_t opcode = INSTR (20, 15);
7967
7968 if (M_S != 0)
7969 HALT_UNALLOC;
7970
7971 if (type == 3)
7972 {
7973 if (opcode == 4)
7974 do_FCVT_half_to_single (cpu);
7975 else if (opcode == 5)
7976 do_FCVT_half_to_double (cpu);
7977 else
7978 HALT_UNALLOC;
7979 return;
7980 }
7981
7982 if (type == 2)
7983 HALT_UNALLOC;
7984
7985 switch (opcode)
7986 {
7987 case 0:
7988 if (type)
7989 ffmovd (cpu);
7990 else
7991 ffmovs (cpu);
7992 return;
7993
7994 case 1:
7995 if (type)
7996 fabcpu (cpu);
7997 else
7998 fabss (cpu);
7999 return;
8000
8001 case 2:
8002 if (type)
8003 fnegd (cpu);
8004 else
8005 fnegs (cpu);
8006 return;
8007
8008 case 3:
8009 if (type)
8010 fsqrtd (cpu);
8011 else
8012 fsqrts (cpu);
8013 return;
8014
8015 case 4:
8016 if (type)
8017 fcvtds (cpu);
8018 else
8019 HALT_UNALLOC;
8020 return;
8021
8022 case 5:
8023 if (type)
8024 HALT_UNALLOC;
8025 fcvtcpu (cpu);
8026 return;
8027
8028 case 8: /* FRINTN etc. */
8029 case 9:
8030 case 10:
8031 case 11:
8032 case 12:
8033 case 14:
8034 case 15:
8035 do_FRINT (cpu);
8036 return;
8037
8038 case 7:
8039 if (INSTR (22, 22))
8040 do_FCVT_double_to_half (cpu);
8041 else
8042 do_FCVT_single_to_half (cpu);
8043 return;
8044
8045 case 13:
8046 HALT_NYI;
8047
8048 default:
8049 HALT_UNALLOC;
8050 }
8051 }
8052
8053 /* 32 bit signed int to float. */
8054 static void
8055 scvtf32 (sim_cpu *cpu)
8056 {
8057 unsigned rn = INSTR (9, 5);
8058 unsigned sd = INSTR (4, 0);
8059
8060 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8061 aarch64_set_FP_float
8062 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8063 }
8064
8065 /* signed int to float. */
8066 static void
8067 scvtf (sim_cpu *cpu)
8068 {
8069 unsigned rn = INSTR (9, 5);
8070 unsigned sd = INSTR (4, 0);
8071
8072 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8073 aarch64_set_FP_float
8074 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8075 }
8076
8077 /* 32 bit signed int to double. */
8078 static void
8079 scvtd32 (sim_cpu *cpu)
8080 {
8081 unsigned rn = INSTR (9, 5);
8082 unsigned sd = INSTR (4, 0);
8083
8084 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8085 aarch64_set_FP_double
8086 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8087 }
8088
8089 /* signed int to double. */
8090 static void
8091 scvtd (sim_cpu *cpu)
8092 {
8093 unsigned rn = INSTR (9, 5);
8094 unsigned sd = INSTR (4, 0);
8095
8096 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8097 aarch64_set_FP_double
8098 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8099 }
8100
8101 static const float FLOAT_INT_MAX = (float) INT_MAX;
8102 static const float FLOAT_INT_MIN = (float) INT_MIN;
8103 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8104 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8105 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8106 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8107 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8108 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8109
8110 #define UINT_MIN 0
8111 #define ULONG_MIN 0
8112 static const float FLOAT_UINT_MAX = (float) UINT_MAX;
8113 static const float FLOAT_UINT_MIN = (float) UINT_MIN;
8114 static const double DOUBLE_UINT_MAX = (double) UINT_MAX;
8115 static const double DOUBLE_UINT_MIN = (double) UINT_MIN;
8116 static const float FLOAT_ULONG_MAX = (float) ULONG_MAX;
8117 static const float FLOAT_ULONG_MIN = (float) ULONG_MIN;
8118 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8119 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8120
8121 /* Check for FP exception conditions:
8122 NaN raises IO
8123 Infinity raises IO
8124 Out of Range raises IO and IX and saturates value
8125 Denormal raises ID and IX and sets to zero. */
8126 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8127 do \
8128 { \
8129 switch (fpclassify (F)) \
8130 { \
8131 case FP_INFINITE: \
8132 case FP_NAN: \
8133 aarch64_set_FPSR (cpu, IO); \
8134 if (signbit (F)) \
8135 VALUE = ITYPE##_MAX; \
8136 else \
8137 VALUE = ITYPE##_MIN; \
8138 break; \
8139 \
8140 case FP_NORMAL: \
8141 if (F >= FTYPE##_##ITYPE##_MAX) \
8142 { \
8143 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8144 VALUE = ITYPE##_MAX; \
8145 } \
8146 else if (F <= FTYPE##_##ITYPE##_MIN) \
8147 { \
8148 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8149 VALUE = ITYPE##_MIN; \
8150 } \
8151 break; \
8152 \
8153 case FP_SUBNORMAL: \
8154 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8155 VALUE = 0; \
8156 break; \
8157 \
8158 default: \
8159 case FP_ZERO: \
8160 VALUE = 0; \
8161 break; \
8162 } \
8163 } \
8164 while (0)
8165
8166 /* 32 bit convert float to signed int truncate towards zero. */
8167 static void
8168 fcvtszs32 (sim_cpu *cpu)
8169 {
8170 unsigned sn = INSTR (9, 5);
8171 unsigned rd = INSTR (4, 0);
8172 /* TODO : check that this rounds toward zero. */
8173 float f = aarch64_get_FP_float (cpu, sn);
8174 int32_t value = (int32_t) f;
8175
8176 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8177
8178 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8179 /* Avoid sign extension to 64 bit. */
8180 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8181 }
8182
8183 /* 64 bit convert float to signed int truncate towards zero. */
8184 static void
8185 fcvtszs (sim_cpu *cpu)
8186 {
8187 unsigned sn = INSTR (9, 5);
8188 unsigned rd = INSTR (4, 0);
8189 float f = aarch64_get_FP_float (cpu, sn);
8190 int64_t value = (int64_t) f;
8191
8192 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8193
8194 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8195 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8196 }
8197
8198 /* 32 bit convert double to signed int truncate towards zero. */
8199 static void
8200 fcvtszd32 (sim_cpu *cpu)
8201 {
8202 unsigned sn = INSTR (9, 5);
8203 unsigned rd = INSTR (4, 0);
8204 /* TODO : check that this rounds toward zero. */
8205 double d = aarch64_get_FP_double (cpu, sn);
8206 int32_t value = (int32_t) d;
8207
8208 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8209
8210 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8211 /* Avoid sign extension to 64 bit. */
8212 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8213 }
8214
8215 /* 64 bit convert double to signed int truncate towards zero. */
8216 static void
8217 fcvtszd (sim_cpu *cpu)
8218 {
8219 unsigned sn = INSTR (9, 5);
8220 unsigned rd = INSTR (4, 0);
8221 /* TODO : check that this rounds toward zero. */
8222 double d = aarch64_get_FP_double (cpu, sn);
8223 int64_t value;
8224
8225 value = (int64_t) d;
8226
8227 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8228
8229 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8230 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8231 }
8232
8233 static void
8234 do_fcvtzu (sim_cpu *cpu)
8235 {
8236 /* instr[31] = size: 32-bit (0), 64-bit (1)
8237 instr[30,23] = 00111100
8238 instr[22] = type: single (0)/ double (1)
8239 instr[21] = enable (0)/disable(1) precision
8240 instr[20,16] = 11001
8241 instr[15,10] = precision
8242 instr[9,5] = Rs
8243 instr[4,0] = Rd. */
8244
8245 unsigned rs = INSTR (9, 5);
8246 unsigned rd = INSTR (4, 0);
8247
8248 NYI_assert (30, 23, 0x3C);
8249 NYI_assert (20, 16, 0x19);
8250
8251 if (INSTR (21, 21) != 1)
8252 /* Convert to fixed point. */
8253 HALT_NYI;
8254
8255 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8256 if (INSTR (31, 31))
8257 {
8258 /* Convert to unsigned 64-bit integer. */
8259 if (INSTR (22, 22))
8260 {
8261 double d = aarch64_get_FP_double (cpu, rs);
8262 uint64_t value = (uint64_t) d;
8263
8264 /* Do not raise an exception if we have reached ULONG_MAX. */
8265 if (value != (1UL << 63))
8266 RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8267
8268 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8269 }
8270 else
8271 {
8272 float f = aarch64_get_FP_float (cpu, rs);
8273 uint64_t value = (uint64_t) f;
8274
8275 /* Do not raise an exception if we have reached ULONG_MAX. */
8276 if (value != (1UL << 63))
8277 RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8278
8279 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8280 }
8281 }
8282 else
8283 {
8284 uint32_t value;
8285
8286 /* Convert to unsigned 32-bit integer. */
8287 if (INSTR (22, 22))
8288 {
8289 double d = aarch64_get_FP_double (cpu, rs);
8290
8291 value = (uint32_t) d;
8292 /* Do not raise an exception if we have reached UINT_MAX. */
8293 if (value != (1UL << 31))
8294 RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8295 }
8296 else
8297 {
8298 float f = aarch64_get_FP_float (cpu, rs);
8299
8300 value = (uint32_t) f;
8301 /* Do not raise an exception if we have reached UINT_MAX. */
8302 if (value != (1UL << 31))
8303 RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8304 }
8305
8306 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8307 }
8308 }
8309
8310 static void
8311 do_UCVTF (sim_cpu *cpu)
8312 {
8313 /* instr[31] = size: 32-bit (0), 64-bit (1)
8314 instr[30,23] = 001 1110 0
8315 instr[22] = type: single (0)/ double (1)
8316 instr[21] = enable (0)/disable(1) precision
8317 instr[20,16] = 0 0011
8318 instr[15,10] = precision
8319 instr[9,5] = Rs
8320 instr[4,0] = Rd. */
8321
8322 unsigned rs = INSTR (9, 5);
8323 unsigned rd = INSTR (4, 0);
8324
8325 NYI_assert (30, 23, 0x3C);
8326 NYI_assert (20, 16, 0x03);
8327
8328 if (INSTR (21, 21) != 1)
8329 HALT_NYI;
8330
8331 /* FIXME: Add exception raising. */
8332 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8333 if (INSTR (31, 31))
8334 {
8335 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8336
8337 if (INSTR (22, 22))
8338 aarch64_set_FP_double (cpu, rd, (double) value);
8339 else
8340 aarch64_set_FP_float (cpu, rd, (float) value);
8341 }
8342 else
8343 {
8344 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8345
8346 if (INSTR (22, 22))
8347 aarch64_set_FP_double (cpu, rd, (double) value);
8348 else
8349 aarch64_set_FP_float (cpu, rd, (float) value);
8350 }
8351 }
8352
8353 static void
8354 float_vector_move (sim_cpu *cpu)
8355 {
8356 /* instr[31,17] == 100 1111 0101 0111
8357 instr[16] ==> direction 0=> to GR, 1=> from GR
8358 instr[15,10] => ???
8359 instr[9,5] ==> source
8360 instr[4,0] ==> dest. */
8361
8362 unsigned rn = INSTR (9, 5);
8363 unsigned rd = INSTR (4, 0);
8364
8365 NYI_assert (31, 17, 0x4F57);
8366
8367 if (INSTR (15, 10) != 0)
8368 HALT_UNALLOC;
8369
8370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8371 if (INSTR (16, 16))
8372 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8373 else
8374 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8375 }
8376
8377 static void
8378 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8379 {
8380 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8381 instr[30 = 0
8382 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8383 instr[28,25] = 1111
8384 instr[24] = 0
8385 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8386 instr[21] = 1
8387 instr[20,19] = rmode
8388 instr[18,16] = opcode
8389 instr[15,10] = 10 0000 */
8390
8391 uint32_t rmode_opcode;
8392 uint32_t size_type;
8393 uint32_t type;
8394 uint32_t size;
8395 uint32_t S;
8396
8397 if (INSTR (31, 17) == 0x4F57)
8398 {
8399 float_vector_move (cpu);
8400 return;
8401 }
8402
8403 size = INSTR (31, 31);
8404 S = INSTR (29, 29);
8405 if (S != 0)
8406 HALT_UNALLOC;
8407
8408 type = INSTR (23, 22);
8409 if (type > 1)
8410 HALT_UNALLOC;
8411
8412 rmode_opcode = INSTR (20, 16);
8413 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8414
8415 switch (rmode_opcode)
8416 {
8417 case 2: /* SCVTF. */
8418 switch (size_type)
8419 {
8420 case 0: scvtf32 (cpu); return;
8421 case 1: scvtd32 (cpu); return;
8422 case 2: scvtf (cpu); return;
8423 case 3: scvtd (cpu); return;
8424 }
8425
8426 case 6: /* FMOV GR, Vec. */
8427 switch (size_type)
8428 {
8429 case 0: gfmovs (cpu); return;
8430 case 3: gfmovd (cpu); return;
8431 default: HALT_UNALLOC;
8432 }
8433
8434 case 7: /* FMOV vec, GR. */
8435 switch (size_type)
8436 {
8437 case 0: fgmovs (cpu); return;
8438 case 3: fgmovd (cpu); return;
8439 default: HALT_UNALLOC;
8440 }
8441
8442 case 24: /* FCVTZS. */
8443 switch (size_type)
8444 {
8445 case 0: fcvtszs32 (cpu); return;
8446 case 1: fcvtszd32 (cpu); return;
8447 case 2: fcvtszs (cpu); return;
8448 case 3: fcvtszd (cpu); return;
8449 }
8450
8451 case 25: do_fcvtzu (cpu); return;
8452 case 3: do_UCVTF (cpu); return;
8453
8454 case 0: /* FCVTNS. */
8455 case 1: /* FCVTNU. */
8456 case 4: /* FCVTAS. */
8457 case 5: /* FCVTAU. */
8458 case 8: /* FCVPTS. */
8459 case 9: /* FCVTPU. */
8460 case 16: /* FCVTMS. */
8461 case 17: /* FCVTMU. */
8462 default:
8463 HALT_NYI;
8464 }
8465 }
8466
8467 static void
8468 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8469 {
8470 uint32_t flags;
8471
8472 /* FIXME: Add exception raising. */
8473 if (isnan (fvalue1) || isnan (fvalue2))
8474 flags = C|V;
8475 else if (isinf (fvalue1) && isinf (fvalue2))
8476 {
8477 /* Subtracting two infinities may give a NaN. We only need to compare
8478 the signs, which we can get from isinf. */
8479 int result = isinf (fvalue1) - isinf (fvalue2);
8480
8481 if (result == 0)
8482 flags = Z|C;
8483 else if (result < 0)
8484 flags = N;
8485 else /* (result > 0). */
8486 flags = C;
8487 }
8488 else
8489 {
8490 float result = fvalue1 - fvalue2;
8491
8492 if (result == 0.0)
8493 flags = Z|C;
8494 else if (result < 0)
8495 flags = N;
8496 else /* (result > 0). */
8497 flags = C;
8498 }
8499
8500 aarch64_set_CPSR (cpu, flags);
8501 }
8502
8503 static void
8504 fcmps (sim_cpu *cpu)
8505 {
8506 unsigned sm = INSTR (20, 16);
8507 unsigned sn = INSTR ( 9, 5);
8508
8509 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8510 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8511
8512 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8513 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8514 }
8515
8516 /* Float compare to zero -- Invalid Operation exception
8517 only on signaling NaNs. */
8518 static void
8519 fcmpzs (sim_cpu *cpu)
8520 {
8521 unsigned sn = INSTR ( 9, 5);
8522 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8523
8524 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8525 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8526 }
8527
8528 /* Float compare -- Invalid Operation exception on all NaNs. */
8529 static void
8530 fcmpes (sim_cpu *cpu)
8531 {
8532 unsigned sm = INSTR (20, 16);
8533 unsigned sn = INSTR ( 9, 5);
8534
8535 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8536 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8537
8538 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8539 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8540 }
8541
8542 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8543 static void
8544 fcmpzes (sim_cpu *cpu)
8545 {
8546 unsigned sn = INSTR ( 9, 5);
8547 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8548
8549 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8550 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8551 }
8552
8553 static void
8554 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8555 {
8556 uint32_t flags;
8557
8558 /* FIXME: Add exception raising. */
8559 if (isnan (dval1) || isnan (dval2))
8560 flags = C|V;
8561 else if (isinf (dval1) && isinf (dval2))
8562 {
8563 /* Subtracting two infinities may give a NaN. We only need to compare
8564 the signs, which we can get from isinf. */
8565 int result = isinf (dval1) - isinf (dval2);
8566
8567 if (result == 0)
8568 flags = Z|C;
8569 else if (result < 0)
8570 flags = N;
8571 else /* (result > 0). */
8572 flags = C;
8573 }
8574 else
8575 {
8576 double result = dval1 - dval2;
8577
8578 if (result == 0.0)
8579 flags = Z|C;
8580 else if (result < 0)
8581 flags = N;
8582 else /* (result > 0). */
8583 flags = C;
8584 }
8585
8586 aarch64_set_CPSR (cpu, flags);
8587 }
8588
8589 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8590 static void
8591 fcmpd (sim_cpu *cpu)
8592 {
8593 unsigned sm = INSTR (20, 16);
8594 unsigned sn = INSTR ( 9, 5);
8595
8596 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8597 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8598
8599 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8600 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8601 }
8602
8603 /* Double compare to zero -- Invalid Operation exception
8604 only on signaling NaNs. */
8605 static void
8606 fcmpzd (sim_cpu *cpu)
8607 {
8608 unsigned sn = INSTR ( 9, 5);
8609 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8610
8611 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8612 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8613 }
8614
8615 /* Double compare -- Invalid Operation exception on all NaNs. */
8616 static void
8617 fcmped (sim_cpu *cpu)
8618 {
8619 unsigned sm = INSTR (20, 16);
8620 unsigned sn = INSTR ( 9, 5);
8621
8622 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8623 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8624
8625 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8626 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8627 }
8628
8629 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8630 static void
8631 fcmpzed (sim_cpu *cpu)
8632 {
8633 unsigned sn = INSTR ( 9, 5);
8634 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8635
8636 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8637 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8638 }
8639
8640 static void
8641 dexSimpleFPCompare (sim_cpu *cpu)
8642 {
8643 /* assert instr[28,25] == 1111
8644 instr[30:24:21:13,10] = 0011000
8645 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8646 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8647 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8648 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8649 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8650 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8651 ow ==> UNALLOC */
8652 uint32_t dispatch;
8653 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8654 uint32_t type = INSTR (23, 22);
8655 uint32_t op = INSTR (15, 14);
8656 uint32_t op2_2_0 = INSTR (2, 0);
8657
8658 if (op2_2_0 != 0)
8659 HALT_UNALLOC;
8660
8661 if (M_S != 0)
8662 HALT_UNALLOC;
8663
8664 if (type > 1)
8665 HALT_UNALLOC;
8666
8667 if (op != 0)
8668 HALT_UNALLOC;
8669
8670 /* dispatch on type and top 2 bits of opcode. */
8671 dispatch = (type << 2) | INSTR (4, 3);
8672
8673 switch (dispatch)
8674 {
8675 case 0: fcmps (cpu); return;
8676 case 1: fcmpzs (cpu); return;
8677 case 2: fcmpes (cpu); return;
8678 case 3: fcmpzes (cpu); return;
8679 case 4: fcmpd (cpu); return;
8680 case 5: fcmpzd (cpu); return;
8681 case 6: fcmped (cpu); return;
8682 case 7: fcmpzed (cpu); return;
8683 }
8684 }
8685
8686 static void
8687 do_scalar_FADDP (sim_cpu *cpu)
8688 {
8689 /* instr [31,23] = 0111 1110 0
8690 instr [22] = single(0)/double(1)
8691 instr [21,10] = 11 0000 1101 10
8692 instr [9,5] = Fn
8693 instr [4,0] = Fd. */
8694
8695 unsigned Fn = INSTR (9, 5);
8696 unsigned Fd = INSTR (4, 0);
8697
8698 NYI_assert (31, 23, 0x0FC);
8699 NYI_assert (21, 10, 0xC36);
8700
8701 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8702 if (INSTR (22, 22))
8703 {
8704 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8705 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8706
8707 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8708 }
8709 else
8710 {
8711 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8712 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8713
8714 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8715 }
8716 }
8717
8718 /* Floating point absolute difference. */
8719
8720 static void
8721 do_scalar_FABD (sim_cpu *cpu)
8722 {
8723 /* instr [31,23] = 0111 1110 1
8724 instr [22] = float(0)/double(1)
8725 instr [21] = 1
8726 instr [20,16] = Rm
8727 instr [15,10] = 1101 01
8728 instr [9, 5] = Rn
8729 instr [4, 0] = Rd. */
8730
8731 unsigned rm = INSTR (20, 16);
8732 unsigned rn = INSTR (9, 5);
8733 unsigned rd = INSTR (4, 0);
8734
8735 NYI_assert (31, 23, 0x0FD);
8736 NYI_assert (21, 21, 1);
8737 NYI_assert (15, 10, 0x35);
8738
8739 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8740 if (INSTR (22, 22))
8741 aarch64_set_FP_double (cpu, rd,
8742 fabs (aarch64_get_FP_double (cpu, rn)
8743 - aarch64_get_FP_double (cpu, rm)));
8744 else
8745 aarch64_set_FP_float (cpu, rd,
8746 fabsf (aarch64_get_FP_float (cpu, rn)
8747 - aarch64_get_FP_float (cpu, rm)));
8748 }
8749
8750 static void
8751 do_scalar_CMGT (sim_cpu *cpu)
8752 {
8753 /* instr [31,21] = 0101 1110 111
8754 instr [20,16] = Rm
8755 instr [15,10] = 00 1101
8756 instr [9, 5] = Rn
8757 instr [4, 0] = Rd. */
8758
8759 unsigned rm = INSTR (20, 16);
8760 unsigned rn = INSTR (9, 5);
8761 unsigned rd = INSTR (4, 0);
8762
8763 NYI_assert (31, 21, 0x2F7);
8764 NYI_assert (15, 10, 0x0D);
8765
8766 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8767 aarch64_set_vec_u64 (cpu, rd, 0,
8768 aarch64_get_vec_u64 (cpu, rn, 0) >
8769 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8770 }
8771
8772 static void
8773 do_scalar_USHR (sim_cpu *cpu)
8774 {
8775 /* instr [31,23] = 0111 1111 0
8776 instr [22,16] = shift amount
8777 instr [15,10] = 0000 01
8778 instr [9, 5] = Rn
8779 instr [4, 0] = Rd. */
8780
8781 unsigned amount = 128 - INSTR (22, 16);
8782 unsigned rn = INSTR (9, 5);
8783 unsigned rd = INSTR (4, 0);
8784
8785 NYI_assert (31, 23, 0x0FE);
8786 NYI_assert (15, 10, 0x01);
8787
8788 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8789 aarch64_set_vec_u64 (cpu, rd, 0,
8790 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8791 }
8792
8793 static void
8794 do_scalar_SSHL (sim_cpu *cpu)
8795 {
8796 /* instr [31,21] = 0101 1110 111
8797 instr [20,16] = Rm
8798 instr [15,10] = 0100 01
8799 instr [9, 5] = Rn
8800 instr [4, 0] = Rd. */
8801
8802 unsigned rm = INSTR (20, 16);
8803 unsigned rn = INSTR (9, 5);
8804 unsigned rd = INSTR (4, 0);
8805 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8806
8807 NYI_assert (31, 21, 0x2F7);
8808 NYI_assert (15, 10, 0x11);
8809
8810 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8811 if (shift >= 0)
8812 aarch64_set_vec_s64 (cpu, rd, 0,
8813 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8814 else
8815 aarch64_set_vec_s64 (cpu, rd, 0,
8816 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8817 }
8818
8819 static void
8820 do_scalar_shift (sim_cpu *cpu)
8821 {
8822 /* instr [31,23] = 0101 1111 0
8823 instr [22,16] = shift amount
8824 instr [15,10] = 0101 01 [SHL]
8825 instr [15,10] = 0000 01 [SSHR]
8826 instr [9, 5] = Rn
8827 instr [4, 0] = Rd. */
8828
8829 unsigned rn = INSTR (9, 5);
8830 unsigned rd = INSTR (4, 0);
8831 unsigned amount;
8832
8833 NYI_assert (31, 23, 0x0BE);
8834
8835 if (INSTR (22, 22) == 0)
8836 HALT_UNALLOC;
8837
8838 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8839 switch (INSTR (15, 10))
8840 {
8841 case 0x01: /* SSHR */
8842 amount = 128 - INSTR (22, 16);
8843 aarch64_set_vec_s64 (cpu, rd, 0,
8844 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8845 return;
8846 case 0x15: /* SHL */
8847 amount = INSTR (22, 16) - 64;
8848 aarch64_set_vec_u64 (cpu, rd, 0,
8849 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8850 return;
8851 default:
8852 HALT_NYI;
8853 }
8854 }
8855
8856 /* FCMEQ FCMGT FCMGE. */
8857 static void
8858 do_scalar_FCM (sim_cpu *cpu)
8859 {
8860 /* instr [31,30] = 01
8861 instr [29] = U
8862 instr [28,24] = 1 1110
8863 instr [23] = E
8864 instr [22] = size
8865 instr [21] = 1
8866 instr [20,16] = Rm
8867 instr [15,12] = 1110
8868 instr [11] = AC
8869 instr [10] = 1
8870 instr [9, 5] = Rn
8871 instr [4, 0] = Rd. */
8872
8873 unsigned rm = INSTR (20, 16);
8874 unsigned rn = INSTR (9, 5);
8875 unsigned rd = INSTR (4, 0);
8876 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
8877 unsigned result;
8878 float val1;
8879 float val2;
8880
8881 NYI_assert (31, 30, 1);
8882 NYI_assert (28, 24, 0x1E);
8883 NYI_assert (21, 21, 1);
8884 NYI_assert (15, 12, 0xE);
8885 NYI_assert (10, 10, 1);
8886
8887 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8888 if (INSTR (22, 22))
8889 {
8890 double val1 = aarch64_get_FP_double (cpu, rn);
8891 double val2 = aarch64_get_FP_double (cpu, rm);
8892
8893 switch (EUac)
8894 {
8895 case 0: /* 000 */
8896 result = val1 == val2;
8897 break;
8898
8899 case 3: /* 011 */
8900 val1 = fabs (val1);
8901 val2 = fabs (val2);
8902 /* Fall through. */
8903 case 2: /* 010 */
8904 result = val1 >= val2;
8905 break;
8906
8907 case 7: /* 111 */
8908 val1 = fabs (val1);
8909 val2 = fabs (val2);
8910 /* Fall through. */
8911 case 6: /* 110 */
8912 result = val1 > val2;
8913 break;
8914
8915 default:
8916 HALT_UNALLOC;
8917 }
8918
8919 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8920 return;
8921 }
8922
8923 val1 = aarch64_get_FP_float (cpu, rn);
8924 val2 = aarch64_get_FP_float (cpu, rm);
8925
8926 switch (EUac)
8927 {
8928 case 0: /* 000 */
8929 result = val1 == val2;
8930 break;
8931
8932 case 3: /* 011 */
8933 val1 = fabsf (val1);
8934 val2 = fabsf (val2);
8935 /* Fall through. */
8936 case 2: /* 010 */
8937 result = val1 >= val2;
8938 break;
8939
8940 case 7: /* 111 */
8941 val1 = fabsf (val1);
8942 val2 = fabsf (val2);
8943 /* Fall through. */
8944 case 6: /* 110 */
8945 result = val1 > val2;
8946 break;
8947
8948 default:
8949 HALT_UNALLOC;
8950 }
8951
8952 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8953 }
8954
8955 /* An alias of DUP. */
8956 static void
8957 do_scalar_MOV (sim_cpu *cpu)
8958 {
8959 /* instr [31,21] = 0101 1110 000
8960 instr [20,16] = imm5
8961 instr [15,10] = 0000 01
8962 instr [9, 5] = Rn
8963 instr [4, 0] = Rd. */
8964
8965 unsigned rn = INSTR (9, 5);
8966 unsigned rd = INSTR (4, 0);
8967 unsigned index;
8968
8969 NYI_assert (31, 21, 0x2F0);
8970 NYI_assert (15, 10, 0x01);
8971
8972 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8973 if (INSTR (16, 16))
8974 {
8975 /* 8-bit. */
8976 index = INSTR (20, 17);
8977 aarch64_set_vec_u8
8978 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
8979 }
8980 else if (INSTR (17, 17))
8981 {
8982 /* 16-bit. */
8983 index = INSTR (20, 18);
8984 aarch64_set_vec_u16
8985 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
8986 }
8987 else if (INSTR (18, 18))
8988 {
8989 /* 32-bit. */
8990 index = INSTR (20, 19);
8991 aarch64_set_vec_u32
8992 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
8993 }
8994 else if (INSTR (19, 19))
8995 {
8996 /* 64-bit. */
8997 index = INSTR (20, 20);
8998 aarch64_set_vec_u64
8999 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9000 }
9001 else
9002 HALT_UNALLOC;
9003 }
9004
9005 static void
9006 do_scalar_NEG (sim_cpu *cpu)
9007 {
9008 /* instr [31,10] = 0111 1110 1110 0000 1011 10
9009 instr [9, 5] = Rn
9010 instr [4, 0] = Rd. */
9011
9012 unsigned rn = INSTR (9, 5);
9013 unsigned rd = INSTR (4, 0);
9014
9015 NYI_assert (31, 10, 0x1FB82E);
9016
9017 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9018 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9019 }
9020
9021 static void
9022 do_scalar_USHL (sim_cpu *cpu)
9023 {
9024 /* instr [31,21] = 0111 1110 111
9025 instr [20,16] = Rm
9026 instr [15,10] = 0100 01
9027 instr [9, 5] = Rn
9028 instr [4, 0] = Rd. */
9029
9030 unsigned rm = INSTR (20, 16);
9031 unsigned rn = INSTR (9, 5);
9032 unsigned rd = INSTR (4, 0);
9033 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9034
9035 NYI_assert (31, 21, 0x3F7);
9036 NYI_assert (15, 10, 0x11);
9037
9038 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9039 if (shift >= 0)
9040 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9041 else
9042 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9043 }
9044
9045 static void
9046 do_double_add (sim_cpu *cpu)
9047 {
9048 /* instr [31,21] = 0101 1110 111
9049 instr [20,16] = Fn
9050 instr [15,10] = 1000 01
9051 instr [9,5] = Fm
9052 instr [4,0] = Fd. */
9053 unsigned Fd;
9054 unsigned Fm;
9055 unsigned Fn;
9056 double val1;
9057 double val2;
9058
9059 NYI_assert (31, 21, 0x2F7);
9060 NYI_assert (15, 10, 0x21);
9061
9062 Fd = INSTR (4, 0);
9063 Fm = INSTR (9, 5);
9064 Fn = INSTR (20, 16);
9065
9066 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9067 val1 = aarch64_get_FP_double (cpu, Fm);
9068 val2 = aarch64_get_FP_double (cpu, Fn);
9069
9070 aarch64_set_FP_double (cpu, Fd, val1 + val2);
9071 }
9072
9073 static void
9074 do_scalar_UCVTF (sim_cpu *cpu)
9075 {
9076 /* instr [31,23] = 0111 1110 0
9077 instr [22] = single(0)/double(1)
9078 instr [21,10] = 10 0001 1101 10
9079 instr [9,5] = rn
9080 instr [4,0] = rd. */
9081
9082 unsigned rn = INSTR (9, 5);
9083 unsigned rd = INSTR (4, 0);
9084
9085 NYI_assert (31, 23, 0x0FC);
9086 NYI_assert (21, 10, 0x876);
9087
9088 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9089 if (INSTR (22, 22))
9090 {
9091 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9092
9093 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9094 }
9095 else
9096 {
9097 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9098
9099 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9100 }
9101 }
9102
9103 static void
9104 do_scalar_vec (sim_cpu *cpu)
9105 {
9106 /* instr [30] = 1. */
9107 /* instr [28,25] = 1111. */
9108 switch (INSTR (31, 23))
9109 {
9110 case 0xBC:
9111 switch (INSTR (15, 10))
9112 {
9113 case 0x01: do_scalar_MOV (cpu); return;
9114 case 0x39: do_scalar_FCM (cpu); return;
9115 case 0x3B: do_scalar_FCM (cpu); return;
9116 }
9117 break;
9118
9119 case 0xBE: do_scalar_shift (cpu); return;
9120
9121 case 0xFC:
9122 switch (INSTR (15, 10))
9123 {
9124 case 0x36:
9125 switch (INSTR (21, 16))
9126 {
9127 case 0x30: do_scalar_FADDP (cpu); return;
9128 case 0x21: do_scalar_UCVTF (cpu); return;
9129 }
9130 HALT_NYI;
9131 case 0x39: do_scalar_FCM (cpu); return;
9132 case 0x3B: do_scalar_FCM (cpu); return;
9133 }
9134 break;
9135
9136 case 0xFD:
9137 switch (INSTR (15, 10))
9138 {
9139 case 0x0D: do_scalar_CMGT (cpu); return;
9140 case 0x11: do_scalar_USHL (cpu); return;
9141 case 0x2E: do_scalar_NEG (cpu); return;
9142 case 0x35: do_scalar_FABD (cpu); return;
9143 case 0x39: do_scalar_FCM (cpu); return;
9144 case 0x3B: do_scalar_FCM (cpu); return;
9145 default:
9146 HALT_NYI;
9147 }
9148
9149 case 0xFE: do_scalar_USHR (cpu); return;
9150
9151 case 0xBD:
9152 switch (INSTR (15, 10))
9153 {
9154 case 0x21: do_double_add (cpu); return;
9155 case 0x11: do_scalar_SSHL (cpu); return;
9156 default:
9157 HALT_NYI;
9158 }
9159
9160 default:
9161 HALT_NYI;
9162 }
9163 }
9164
9165 static void
9166 dexAdvSIMD1 (sim_cpu *cpu)
9167 {
9168 /* instr [28,25] = 1 111. */
9169
9170 /* We are currently only interested in the basic
9171 scalar fp routines which all have bit 30 = 0. */
9172 if (INSTR (30, 30))
9173 do_scalar_vec (cpu);
9174
9175 /* instr[24] is set for FP data processing 3-source and clear for
9176 all other basic scalar fp instruction groups. */
9177 else if (INSTR (24, 24))
9178 dexSimpleFPDataProc3Source (cpu);
9179
9180 /* instr[21] is clear for floating <-> fixed conversions and set for
9181 all other basic scalar fp instruction groups. */
9182 else if (!INSTR (21, 21))
9183 dexSimpleFPFixedConvert (cpu);
9184
9185 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9186 11 ==> cond select, 00 ==> other. */
9187 else
9188 switch (INSTR (11, 10))
9189 {
9190 case 1: dexSimpleFPCondCompare (cpu); return;
9191 case 2: dexSimpleFPDataProc2Source (cpu); return;
9192 case 3: dexSimpleFPCondSelect (cpu); return;
9193
9194 default:
9195 /* Now an ordered cascade of tests.
9196 FP immediate has instr [12] == 1.
9197 FP compare has instr [13] == 1.
9198 FP Data Proc 1 Source has instr [14] == 1.
9199 FP floating <--> integer conversions has instr [15] == 0. */
9200 if (INSTR (12, 12))
9201 dexSimpleFPImmediate (cpu);
9202
9203 else if (INSTR (13, 13))
9204 dexSimpleFPCompare (cpu);
9205
9206 else if (INSTR (14, 14))
9207 dexSimpleFPDataProc1Source (cpu);
9208
9209 else if (!INSTR (15, 15))
9210 dexSimpleFPIntegerConvert (cpu);
9211
9212 else
9213 /* If we get here then instr[15] == 1 which means UNALLOC. */
9214 HALT_UNALLOC;
9215 }
9216 }
9217
9218 /* PC relative addressing. */
9219
9220 static void
9221 pcadr (sim_cpu *cpu)
9222 {
9223 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9224 instr[30,29] = immlo
9225 instr[23,5] = immhi. */
9226 uint64_t address;
9227 unsigned rd = INSTR (4, 0);
9228 uint32_t isPage = INSTR (31, 31);
9229 union { int64_t u64; uint64_t s64; } imm;
9230 uint64_t offset;
9231
9232 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9233 offset = imm.u64;
9234 offset = (offset << 2) | INSTR (30, 29);
9235
9236 address = aarch64_get_PC (cpu);
9237
9238 if (isPage)
9239 {
9240 offset <<= 12;
9241 address &= ~0xfff;
9242 }
9243
9244 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9245 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9246 }
9247
9248 /* Specific decode and execute for group Data Processing Immediate. */
9249
9250 static void
9251 dexPCRelAddressing (sim_cpu *cpu)
9252 {
9253 /* assert instr[28,24] = 10000. */
9254 pcadr (cpu);
9255 }
9256
9257 /* Immediate logical.
9258 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9259 16, 32 or 64 bit sequence pulled out at decode and possibly
9260 inverting it..
9261
9262 N.B. the output register (dest) can normally be Xn or SP
9263 the exception occurs for flag setting instructions which may
9264 only use Xn for the output (dest). The input register can
9265 never be SP. */
9266
9267 /* 32 bit and immediate. */
9268 static void
9269 and32 (sim_cpu *cpu, uint32_t bimm)
9270 {
9271 unsigned rn = INSTR (9, 5);
9272 unsigned rd = INSTR (4, 0);
9273
9274 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9275 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9276 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9277 }
9278
9279 /* 64 bit and immediate. */
9280 static void
9281 and64 (sim_cpu *cpu, uint64_t bimm)
9282 {
9283 unsigned rn = INSTR (9, 5);
9284 unsigned rd = INSTR (4, 0);
9285
9286 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9287 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9288 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9289 }
9290
9291 /* 32 bit and immediate set flags. */
9292 static void
9293 ands32 (sim_cpu *cpu, uint32_t bimm)
9294 {
9295 unsigned rn = INSTR (9, 5);
9296 unsigned rd = INSTR (4, 0);
9297
9298 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9299 uint32_t value2 = bimm;
9300
9301 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9302 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9303 set_flags_for_binop32 (cpu, value1 & value2);
9304 }
9305
9306 /* 64 bit and immediate set flags. */
9307 static void
9308 ands64 (sim_cpu *cpu, uint64_t bimm)
9309 {
9310 unsigned rn = INSTR (9, 5);
9311 unsigned rd = INSTR (4, 0);
9312
9313 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9314 uint64_t value2 = bimm;
9315
9316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9317 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9318 set_flags_for_binop64 (cpu, value1 & value2);
9319 }
9320
9321 /* 32 bit exclusive or immediate. */
9322 static void
9323 eor32 (sim_cpu *cpu, uint32_t bimm)
9324 {
9325 unsigned rn = INSTR (9, 5);
9326 unsigned rd = INSTR (4, 0);
9327
9328 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9329 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9330 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9331 }
9332
9333 /* 64 bit exclusive or immediate. */
9334 static void
9335 eor64 (sim_cpu *cpu, uint64_t bimm)
9336 {
9337 unsigned rn = INSTR (9, 5);
9338 unsigned rd = INSTR (4, 0);
9339
9340 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9341 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9342 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9343 }
9344
9345 /* 32 bit or immediate. */
9346 static void
9347 orr32 (sim_cpu *cpu, uint32_t bimm)
9348 {
9349 unsigned rn = INSTR (9, 5);
9350 unsigned rd = INSTR (4, 0);
9351
9352 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9353 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9354 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9355 }
9356
9357 /* 64 bit or immediate. */
9358 static void
9359 orr64 (sim_cpu *cpu, uint64_t bimm)
9360 {
9361 unsigned rn = INSTR (9, 5);
9362 unsigned rd = INSTR (4, 0);
9363
9364 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9365 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9366 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9367 }
9368
9369 /* Logical shifted register.
9370 These allow an optional LSL, ASR, LSR or ROR to the second source
9371 register with a count up to the register bit count.
9372 N.B register args may not be SP. */
9373
9374 /* 32 bit AND shifted register. */
9375 static void
9376 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9377 {
9378 unsigned rm = INSTR (20, 16);
9379 unsigned rn = INSTR (9, 5);
9380 unsigned rd = INSTR (4, 0);
9381
9382 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9383 aarch64_set_reg_u64
9384 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9385 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9386 }
9387
9388 /* 64 bit AND shifted register. */
9389 static void
9390 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9391 {
9392 unsigned rm = INSTR (20, 16);
9393 unsigned rn = INSTR (9, 5);
9394 unsigned rd = INSTR (4, 0);
9395
9396 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9397 aarch64_set_reg_u64
9398 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9399 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9400 }
9401
9402 /* 32 bit AND shifted register setting flags. */
9403 static void
9404 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9405 {
9406 unsigned rm = INSTR (20, 16);
9407 unsigned rn = INSTR (9, 5);
9408 unsigned rd = INSTR (4, 0);
9409
9410 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9411 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9412 shift, count);
9413
9414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9415 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9416 set_flags_for_binop32 (cpu, value1 & value2);
9417 }
9418
9419 /* 64 bit AND shifted register setting flags. */
9420 static void
9421 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9422 {
9423 unsigned rm = INSTR (20, 16);
9424 unsigned rn = INSTR (9, 5);
9425 unsigned rd = INSTR (4, 0);
9426
9427 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9428 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9429 shift, count);
9430
9431 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9432 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9433 set_flags_for_binop64 (cpu, value1 & value2);
9434 }
9435
9436 /* 32 bit BIC shifted register. */
9437 static void
9438 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9439 {
9440 unsigned rm = INSTR (20, 16);
9441 unsigned rn = INSTR (9, 5);
9442 unsigned rd = INSTR (4, 0);
9443
9444 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9445 aarch64_set_reg_u64
9446 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9447 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9448 }
9449
9450 /* 64 bit BIC shifted register. */
9451 static void
9452 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9453 {
9454 unsigned rm = INSTR (20, 16);
9455 unsigned rn = INSTR (9, 5);
9456 unsigned rd = INSTR (4, 0);
9457
9458 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9459 aarch64_set_reg_u64
9460 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9461 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9462 }
9463
9464 /* 32 bit BIC shifted register setting flags. */
9465 static void
9466 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9467 {
9468 unsigned rm = INSTR (20, 16);
9469 unsigned rn = INSTR (9, 5);
9470 unsigned rd = INSTR (4, 0);
9471
9472 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9473 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9474 shift, count);
9475
9476 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9477 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9478 set_flags_for_binop32 (cpu, value1 & value2);
9479 }
9480
9481 /* 64 bit BIC shifted register setting flags. */
9482 static void
9483 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9484 {
9485 unsigned rm = INSTR (20, 16);
9486 unsigned rn = INSTR (9, 5);
9487 unsigned rd = INSTR (4, 0);
9488
9489 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9490 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9491 shift, count);
9492
9493 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9494 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9495 set_flags_for_binop64 (cpu, value1 & value2);
9496 }
9497
9498 /* 32 bit EON shifted register. */
9499 static void
9500 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9501 {
9502 unsigned rm = INSTR (20, 16);
9503 unsigned rn = INSTR (9, 5);
9504 unsigned rd = INSTR (4, 0);
9505
9506 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9507 aarch64_set_reg_u64
9508 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9509 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9510 }
9511
9512 /* 64 bit EON shifted register. */
9513 static void
9514 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9515 {
9516 unsigned rm = INSTR (20, 16);
9517 unsigned rn = INSTR (9, 5);
9518 unsigned rd = INSTR (4, 0);
9519
9520 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9521 aarch64_set_reg_u64
9522 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9523 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9524 }
9525
9526 /* 32 bit EOR shifted register. */
9527 static void
9528 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9529 {
9530 unsigned rm = INSTR (20, 16);
9531 unsigned rn = INSTR (9, 5);
9532 unsigned rd = INSTR (4, 0);
9533
9534 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9535 aarch64_set_reg_u64
9536 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9537 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9538 }
9539
9540 /* 64 bit EOR shifted register. */
9541 static void
9542 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9543 {
9544 unsigned rm = INSTR (20, 16);
9545 unsigned rn = INSTR (9, 5);
9546 unsigned rd = INSTR (4, 0);
9547
9548 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9549 aarch64_set_reg_u64
9550 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9551 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9552 }
9553
9554 /* 32 bit ORR shifted register. */
9555 static void
9556 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9557 {
9558 unsigned rm = INSTR (20, 16);
9559 unsigned rn = INSTR (9, 5);
9560 unsigned rd = INSTR (4, 0);
9561
9562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9563 aarch64_set_reg_u64
9564 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9565 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9566 }
9567
9568 /* 64 bit ORR shifted register. */
9569 static void
9570 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9571 {
9572 unsigned rm = INSTR (20, 16);
9573 unsigned rn = INSTR (9, 5);
9574 unsigned rd = INSTR (4, 0);
9575
9576 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9577 aarch64_set_reg_u64
9578 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9579 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9580 }
9581
9582 /* 32 bit ORN shifted register. */
9583 static void
9584 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9585 {
9586 unsigned rm = INSTR (20, 16);
9587 unsigned rn = INSTR (9, 5);
9588 unsigned rd = INSTR (4, 0);
9589
9590 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9591 aarch64_set_reg_u64
9592 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9593 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9594 }
9595
9596 /* 64 bit ORN shifted register. */
9597 static void
9598 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9599 {
9600 unsigned rm = INSTR (20, 16);
9601 unsigned rn = INSTR (9, 5);
9602 unsigned rd = INSTR (4, 0);
9603
9604 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9605 aarch64_set_reg_u64
9606 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9607 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9608 }
9609
9610 static void
9611 dexLogicalImmediate (sim_cpu *cpu)
9612 {
9613 /* assert instr[28,23] = 1001000
9614 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9615 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9616 instr[22] = N : used to construct immediate mask
9617 instr[21,16] = immr
9618 instr[15,10] = imms
9619 instr[9,5] = Rn
9620 instr[4,0] = Rd */
9621
9622 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9623 uint32_t size = INSTR (31, 31);
9624 uint32_t N = INSTR (22, 22);
9625 /* uint32_t immr = INSTR (21, 16);. */
9626 /* uint32_t imms = INSTR (15, 10);. */
9627 uint32_t index = INSTR (22, 10);
9628 uint64_t bimm64 = LITable [index];
9629 uint32_t dispatch = INSTR (30, 29);
9630
9631 if (~size & N)
9632 HALT_UNALLOC;
9633
9634 if (!bimm64)
9635 HALT_UNALLOC;
9636
9637 if (size == 0)
9638 {
9639 uint32_t bimm = (uint32_t) bimm64;
9640
9641 switch (dispatch)
9642 {
9643 case 0: and32 (cpu, bimm); return;
9644 case 1: orr32 (cpu, bimm); return;
9645 case 2: eor32 (cpu, bimm); return;
9646 case 3: ands32 (cpu, bimm); return;
9647 }
9648 }
9649 else
9650 {
9651 switch (dispatch)
9652 {
9653 case 0: and64 (cpu, bimm64); return;
9654 case 1: orr64 (cpu, bimm64); return;
9655 case 2: eor64 (cpu, bimm64); return;
9656 case 3: ands64 (cpu, bimm64); return;
9657 }
9658 }
9659 HALT_UNALLOC;
9660 }
9661
9662 /* Immediate move.
9663 The uimm argument is a 16 bit value to be inserted into the
9664 target register the pos argument locates the 16 bit word in the
9665 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9666 3} for 64 bit.
9667 N.B register arg may not be SP so it should be.
9668 accessed using the setGZRegisterXXX accessors. */
9669
9670 /* 32 bit move 16 bit immediate zero remaining shorts. */
9671 static void
9672 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9673 {
9674 unsigned rd = INSTR (4, 0);
9675
9676 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9677 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9678 }
9679
9680 /* 64 bit move 16 bit immediate zero remaining shorts. */
9681 static void
9682 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9683 {
9684 unsigned rd = INSTR (4, 0);
9685
9686 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9687 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9688 }
9689
9690 /* 32 bit move 16 bit immediate negated. */
9691 static void
9692 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9693 {
9694 unsigned rd = INSTR (4, 0);
9695
9696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9697 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9698 }
9699
9700 /* 64 bit move 16 bit immediate negated. */
9701 static void
9702 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9703 {
9704 unsigned rd = INSTR (4, 0);
9705
9706 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9707 aarch64_set_reg_u64
9708 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9709 ^ 0xffffffffffffffffULL));
9710 }
9711
9712 /* 32 bit move 16 bit immediate keep remaining shorts. */
9713 static void
9714 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9715 {
9716 unsigned rd = INSTR (4, 0);
9717 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9718 uint32_t value = val << (pos * 16);
9719 uint32_t mask = ~(0xffffU << (pos * 16));
9720
9721 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9722 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9723 }
9724
9725 /* 64 bit move 16 it immediate keep remaining shorts. */
9726 static void
9727 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9728 {
9729 unsigned rd = INSTR (4, 0);
9730 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9731 uint64_t value = (uint64_t) val << (pos * 16);
9732 uint64_t mask = ~(0xffffULL << (pos * 16));
9733
9734 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9735 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9736 }
9737
9738 static void
9739 dexMoveWideImmediate (sim_cpu *cpu)
9740 {
9741 /* assert instr[28:23] = 100101
9742 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9743 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9744 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9745 instr[20,5] = uimm16
9746 instr[4,0] = Rd */
9747
9748 /* N.B. the (multiple of 16) shift is applied by the called routine,
9749 we just pass the multiplier. */
9750
9751 uint32_t imm;
9752 uint32_t size = INSTR (31, 31);
9753 uint32_t op = INSTR (30, 29);
9754 uint32_t shift = INSTR (22, 21);
9755
9756 /* 32 bit can only shift 0 or 1 lot of 16.
9757 anything else is an unallocated instruction. */
9758 if (size == 0 && (shift > 1))
9759 HALT_UNALLOC;
9760
9761 if (op == 1)
9762 HALT_UNALLOC;
9763
9764 imm = INSTR (20, 5);
9765
9766 if (size == 0)
9767 {
9768 if (op == 0)
9769 movn32 (cpu, imm, shift);
9770 else if (op == 2)
9771 movz32 (cpu, imm, shift);
9772 else
9773 movk32 (cpu, imm, shift);
9774 }
9775 else
9776 {
9777 if (op == 0)
9778 movn64 (cpu, imm, shift);
9779 else if (op == 2)
9780 movz64 (cpu, imm, shift);
9781 else
9782 movk64 (cpu, imm, shift);
9783 }
9784 }
9785
9786 /* Bitfield operations.
9787 These take a pair of bit positions r and s which are in {0..31}
9788 or {0..63} depending on the instruction word size.
9789 N.B register args may not be SP. */
9790
9791 /* OK, we start with ubfm which just needs to pick
9792 some bits out of source zero the rest and write
9793 the result to dest. Just need two logical shifts. */
9794
9795 /* 32 bit bitfield move, left and right of affected zeroed
9796 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9797 static void
9798 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9799 {
9800 unsigned rd;
9801 unsigned rn = INSTR (9, 5);
9802 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9803
9804 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9805 if (r <= s)
9806 {
9807 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9808 We want only bits s:xxx:r at the bottom of the word
9809 so we LSL bit s up to bit 31 i.e. by 31 - s
9810 and then we LSR to bring bit 31 down to bit s - r
9811 i.e. by 31 + r - s. */
9812 value <<= 31 - s;
9813 value >>= 31 + r - s;
9814 }
9815 else
9816 {
9817 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9818 We want only bits s:xxx:0 starting at it 31-(r-1)
9819 so we LSL bit s up to bit 31 i.e. by 31 - s
9820 and then we LSL to bring bit 31 down to 31-(r-1)+s
9821 i.e. by r - (s + 1). */
9822 value <<= 31 - s;
9823 value >>= r - (s + 1);
9824 }
9825
9826 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9827 rd = INSTR (4, 0);
9828 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9829 }
9830
9831 /* 64 bit bitfield move, left and right of affected zeroed
9832 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9833 static void
9834 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9835 {
9836 unsigned rd;
9837 unsigned rn = INSTR (9, 5);
9838 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9839
9840 if (r <= s)
9841 {
9842 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9843 We want only bits s:xxx:r at the bottom of the word.
9844 So we LSL bit s up to bit 63 i.e. by 63 - s
9845 and then we LSR to bring bit 63 down to bit s - r
9846 i.e. by 63 + r - s. */
9847 value <<= 63 - s;
9848 value >>= 63 + r - s;
9849 }
9850 else
9851 {
9852 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9853 We want only bits s:xxx:0 starting at it 63-(r-1).
9854 So we LSL bit s up to bit 63 i.e. by 63 - s
9855 and then we LSL to bring bit 63 down to 63-(r-1)+s
9856 i.e. by r - (s + 1). */
9857 value <<= 63 - s;
9858 value >>= r - (s + 1);
9859 }
9860
9861 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9862 rd = INSTR (4, 0);
9863 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9864 }
9865
9866 /* The signed versions need to insert sign bits
9867 on the left of the inserted bit field. so we do
9868 much the same as the unsigned version except we
9869 use an arithmetic shift right -- this just means
9870 we need to operate on signed values. */
9871
9872 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
9873 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9874 static void
9875 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9876 {
9877 unsigned rd;
9878 unsigned rn = INSTR (9, 5);
9879 /* as per ubfm32 but use an ASR instead of an LSR. */
9880 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9881
9882 if (r <= s)
9883 {
9884 value <<= 31 - s;
9885 value >>= 31 + r - s;
9886 }
9887 else
9888 {
9889 value <<= 31 - s;
9890 value >>= r - (s + 1);
9891 }
9892
9893 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9894 rd = INSTR (4, 0);
9895 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
9896 }
9897
9898 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
9899 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9900 static void
9901 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9902 {
9903 unsigned rd;
9904 unsigned rn = INSTR (9, 5);
9905 /* acpu per ubfm but use an ASR instead of an LSR. */
9906 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
9907
9908 if (r <= s)
9909 {
9910 value <<= 63 - s;
9911 value >>= 63 + r - s;
9912 }
9913 else
9914 {
9915 value <<= 63 - s;
9916 value >>= r - (s + 1);
9917 }
9918
9919 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9920 rd = INSTR (4, 0);
9921 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
9922 }
9923
9924 /* Finally, these versions leave non-affected bits
9925 as is. so we need to generate the bits as per
9926 ubfm and also generate a mask to pick the
9927 bits from the original and computed values. */
9928
9929 /* 32 bit bitfield move, non-affected bits left as is.
9930 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9931 static void
9932 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9933 {
9934 unsigned rn = INSTR (9, 5);
9935 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9936 uint32_t mask = -1;
9937 unsigned rd;
9938 uint32_t value2;
9939
9940 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9941 if (r <= s)
9942 {
9943 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9944 We want only bits s:xxx:r at the bottom of the word
9945 so we LSL bit s up to bit 31 i.e. by 31 - s
9946 and then we LSR to bring bit 31 down to bit s - r
9947 i.e. by 31 + r - s. */
9948 value <<= 31 - s;
9949 value >>= 31 + r - s;
9950 /* the mask must include the same bits. */
9951 mask <<= 31 - s;
9952 mask >>= 31 + r - s;
9953 }
9954 else
9955 {
9956 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
9957 We want only bits s:xxx:0 starting at it 31-(r-1)
9958 so we LSL bit s up to bit 31 i.e. by 31 - s
9959 and then we LSL to bring bit 31 down to 31-(r-1)+s
9960 i.e. by r - (s + 1). */
9961 value <<= 31 - s;
9962 value >>= r - (s + 1);
9963 /* The mask must include the same bits. */
9964 mask <<= 31 - s;
9965 mask >>= r - (s + 1);
9966 }
9967
9968 rd = INSTR (4, 0);
9969 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9970
9971 value2 &= ~mask;
9972 value2 |= value;
9973
9974 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9975 aarch64_set_reg_u64
9976 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
9977 }
9978
9979 /* 64 bit bitfield move, non-affected bits left as is.
9980 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9981 static void
9982 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9983 {
9984 unsigned rd;
9985 unsigned rn = INSTR (9, 5);
9986 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9987 uint64_t mask = 0xffffffffffffffffULL;
9988
9989 if (r <= s)
9990 {
9991 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9992 We want only bits s:xxx:r at the bottom of the word
9993 so we LSL bit s up to bit 63 i.e. by 63 - s
9994 and then we LSR to bring bit 63 down to bit s - r
9995 i.e. by 63 + r - s. */
9996 value <<= 63 - s;
9997 value >>= 63 + r - s;
9998 /* The mask must include the same bits. */
9999 mask <<= 63 - s;
10000 mask >>= 63 + r - s;
10001 }
10002 else
10003 {
10004 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10005 We want only bits s:xxx:0 starting at it 63-(r-1)
10006 so we LSL bit s up to bit 63 i.e. by 63 - s
10007 and then we LSL to bring bit 63 down to 63-(r-1)+s
10008 i.e. by r - (s + 1). */
10009 value <<= 63 - s;
10010 value >>= r - (s + 1);
10011 /* The mask must include the same bits. */
10012 mask <<= 63 - s;
10013 mask >>= r - (s + 1);
10014 }
10015
10016 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10017 rd = INSTR (4, 0);
10018 aarch64_set_reg_u64
10019 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10020 }
10021
10022 static void
10023 dexBitfieldImmediate (sim_cpu *cpu)
10024 {
10025 /* assert instr[28:23] = 100110
10026 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10027 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10028 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10029 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10030 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10031 instr[9,5] = Rn
10032 instr[4,0] = Rd */
10033
10034 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10035 uint32_t dispatch;
10036 uint32_t imms;
10037 uint32_t size = INSTR (31, 31);
10038 uint32_t N = INSTR (22, 22);
10039 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
10040 /* or else we have an UNALLOC. */
10041 uint32_t immr = INSTR (21, 16);
10042
10043 if (~size & N)
10044 HALT_UNALLOC;
10045
10046 if (!size && uimm (immr, 5, 5))
10047 HALT_UNALLOC;
10048
10049 imms = INSTR (15, 10);
10050 if (!size && uimm (imms, 5, 5))
10051 HALT_UNALLOC;
10052
10053 /* Switch on combined size and op. */
10054 dispatch = INSTR (31, 29);
10055 switch (dispatch)
10056 {
10057 case 0: sbfm32 (cpu, immr, imms); return;
10058 case 1: bfm32 (cpu, immr, imms); return;
10059 case 2: ubfm32 (cpu, immr, imms); return;
10060 case 4: sbfm (cpu, immr, imms); return;
10061 case 5: bfm (cpu, immr, imms); return;
10062 case 6: ubfm (cpu, immr, imms); return;
10063 default: HALT_UNALLOC;
10064 }
10065 }
10066
10067 static void
10068 do_EXTR_32 (sim_cpu *cpu)
10069 {
10070 /* instr[31:21] = 00010011100
10071 instr[20,16] = Rm
10072 instr[15,10] = imms : 0xxxxx for 32 bit
10073 instr[9,5] = Rn
10074 instr[4,0] = Rd */
10075 unsigned rm = INSTR (20, 16);
10076 unsigned imms = INSTR (15, 10) & 31;
10077 unsigned rn = INSTR ( 9, 5);
10078 unsigned rd = INSTR ( 4, 0);
10079 uint64_t val1;
10080 uint64_t val2;
10081
10082 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10083 val1 >>= imms;
10084 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10085 val2 <<= (32 - imms);
10086
10087 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10088 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10089 }
10090
10091 static void
10092 do_EXTR_64 (sim_cpu *cpu)
10093 {
10094 /* instr[31:21] = 10010011100
10095 instr[20,16] = Rm
10096 instr[15,10] = imms
10097 instr[9,5] = Rn
10098 instr[4,0] = Rd */
10099 unsigned rm = INSTR (20, 16);
10100 unsigned imms = INSTR (15, 10) & 63;
10101 unsigned rn = INSTR ( 9, 5);
10102 unsigned rd = INSTR ( 4, 0);
10103 uint64_t val;
10104
10105 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10106 val >>= imms;
10107 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10108
10109 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10110 }
10111
10112 static void
10113 dexExtractImmediate (sim_cpu *cpu)
10114 {
10115 /* assert instr[28:23] = 100111
10116 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10117 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10118 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10119 instr[21] = op0 : must be 0 or UNALLOC
10120 instr[20,16] = Rm
10121 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10122 instr[9,5] = Rn
10123 instr[4,0] = Rd */
10124
10125 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10126 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10127 uint32_t dispatch;
10128 uint32_t size = INSTR (31, 31);
10129 uint32_t N = INSTR (22, 22);
10130 /* 32 bit operations must have imms[5] = 0
10131 or else we have an UNALLOC. */
10132 uint32_t imms = INSTR (15, 10);
10133
10134 if (size ^ N)
10135 HALT_UNALLOC;
10136
10137 if (!size && uimm (imms, 5, 5))
10138 HALT_UNALLOC;
10139
10140 /* Switch on combined size and op. */
10141 dispatch = INSTR (31, 29);
10142
10143 if (dispatch == 0)
10144 do_EXTR_32 (cpu);
10145
10146 else if (dispatch == 4)
10147 do_EXTR_64 (cpu);
10148
10149 else if (dispatch == 1)
10150 HALT_NYI;
10151 else
10152 HALT_UNALLOC;
10153 }
10154
10155 static void
10156 dexDPImm (sim_cpu *cpu)
10157 {
10158 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10159 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10160 bits [25,23] of a DPImm are the secondary dispatch vector. */
10161 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10162
10163 switch (group2)
10164 {
10165 case DPIMM_PCADR_000:
10166 case DPIMM_PCADR_001:
10167 dexPCRelAddressing (cpu);
10168 return;
10169
10170 case DPIMM_ADDSUB_010:
10171 case DPIMM_ADDSUB_011:
10172 dexAddSubtractImmediate (cpu);
10173 return;
10174
10175 case DPIMM_LOG_100:
10176 dexLogicalImmediate (cpu);
10177 return;
10178
10179 case DPIMM_MOV_101:
10180 dexMoveWideImmediate (cpu);
10181 return;
10182
10183 case DPIMM_BITF_110:
10184 dexBitfieldImmediate (cpu);
10185 return;
10186
10187 case DPIMM_EXTR_111:
10188 dexExtractImmediate (cpu);
10189 return;
10190
10191 default:
10192 /* Should never reach here. */
10193 HALT_NYI;
10194 }
10195 }
10196
10197 static void
10198 dexLoadUnscaledImmediate (sim_cpu *cpu)
10199 {
10200 /* instr[29,24] == 111_00
10201 instr[21] == 0
10202 instr[11,10] == 00
10203 instr[31,30] = size
10204 instr[26] = V
10205 instr[23,22] = opc
10206 instr[20,12] = simm9
10207 instr[9,5] = rn may be SP. */
10208 /* unsigned rt = INSTR (4, 0); */
10209 uint32_t V = INSTR (26, 26);
10210 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10211 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10212
10213 if (!V)
10214 {
10215 /* GReg operations. */
10216 switch (dispatch)
10217 {
10218 case 0: sturb (cpu, imm); return;
10219 case 1: ldurb32 (cpu, imm); return;
10220 case 2: ldursb64 (cpu, imm); return;
10221 case 3: ldursb32 (cpu, imm); return;
10222 case 4: sturh (cpu, imm); return;
10223 case 5: ldurh32 (cpu, imm); return;
10224 case 6: ldursh64 (cpu, imm); return;
10225 case 7: ldursh32 (cpu, imm); return;
10226 case 8: stur32 (cpu, imm); return;
10227 case 9: ldur32 (cpu, imm); return;
10228 case 10: ldursw (cpu, imm); return;
10229 case 12: stur64 (cpu, imm); return;
10230 case 13: ldur64 (cpu, imm); return;
10231
10232 case 14:
10233 /* PRFUM NYI. */
10234 HALT_NYI;
10235
10236 default:
10237 case 11:
10238 case 15:
10239 HALT_UNALLOC;
10240 }
10241 }
10242
10243 /* FReg operations. */
10244 switch (dispatch)
10245 {
10246 case 2: fsturq (cpu, imm); return;
10247 case 3: fldurq (cpu, imm); return;
10248 case 8: fsturs (cpu, imm); return;
10249 case 9: fldurs (cpu, imm); return;
10250 case 12: fsturd (cpu, imm); return;
10251 case 13: fldurd (cpu, imm); return;
10252
10253 case 0: /* STUR 8 bit FP. */
10254 case 1: /* LDUR 8 bit FP. */
10255 case 4: /* STUR 16 bit FP. */
10256 case 5: /* LDUR 8 bit FP. */
10257 HALT_NYI;
10258
10259 default:
10260 case 6:
10261 case 7:
10262 case 10:
10263 case 11:
10264 case 14:
10265 case 15:
10266 HALT_UNALLOC;
10267 }
10268 }
10269
10270 /* N.B. A preliminary note regarding all the ldrs<x>32
10271 instructions
10272
10273 The signed value loaded by these instructions is cast to unsigned
10274 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10275 64 bit element of the GReg union. this performs a 32 bit sign extension
10276 (as required) but avoids 64 bit sign extension, thus ensuring that the
10277 top half of the register word is zero. this is what the spec demands
10278 when a 32 bit load occurs. */
10279
10280 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10281 static void
10282 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10283 {
10284 unsigned int rn = INSTR (9, 5);
10285 unsigned int rt = INSTR (4, 0);
10286
10287 /* The target register may not be SP but the source may be
10288 there is no scaling required for a byte load. */
10289 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10290 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10291 (int64_t) aarch64_get_mem_s8 (cpu, address));
10292 }
10293
10294 /* 32 bit load sign-extended byte scaled or unscaled zero-
10295 or sign-extended 32-bit register offset. */
10296 static void
10297 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10298 {
10299 unsigned int rm = INSTR (20, 16);
10300 unsigned int rn = INSTR (9, 5);
10301 unsigned int rt = INSTR (4, 0);
10302
10303 /* rn may reference SP, rm and rt must reference ZR. */
10304
10305 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10306 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10307 extension);
10308
10309 /* There is no scaling required for a byte load. */
10310 aarch64_set_reg_u64
10311 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10312 + displacement));
10313 }
10314
10315 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10316 pre- or post-writeback. */
10317 static void
10318 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10319 {
10320 uint64_t address;
10321 unsigned int rn = INSTR (9, 5);
10322 unsigned int rt = INSTR (4, 0);
10323
10324 if (rn == rt && wb != NoWriteBack)
10325 HALT_UNALLOC;
10326
10327 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10328
10329 if (wb == Pre)
10330 address += offset;
10331
10332 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10333 (int64_t) aarch64_get_mem_s8 (cpu, address));
10334
10335 if (wb == Post)
10336 address += offset;
10337
10338 if (wb != NoWriteBack)
10339 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10340 }
10341
10342 /* 8 bit store scaled. */
10343 static void
10344 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10345 {
10346 unsigned st = INSTR (4, 0);
10347 unsigned rn = INSTR (9, 5);
10348
10349 aarch64_set_mem_u8 (cpu,
10350 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10351 aarch64_get_vec_u8 (cpu, st, 0));
10352 }
10353
10354 /* 8 bit store scaled or unscaled zero- or
10355 sign-extended 8-bit register offset. */
10356 static void
10357 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10358 {
10359 unsigned rm = INSTR (20, 16);
10360 unsigned rn = INSTR (9, 5);
10361 unsigned st = INSTR (4, 0);
10362
10363 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10364 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10365 extension);
10366 uint64_t displacement = scaling == Scaled ? extended : 0;
10367
10368 aarch64_set_mem_u8
10369 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10370 }
10371
10372 /* 16 bit store scaled. */
10373 static void
10374 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10375 {
10376 unsigned st = INSTR (4, 0);
10377 unsigned rn = INSTR (9, 5);
10378
10379 aarch64_set_mem_u16
10380 (cpu,
10381 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10382 aarch64_get_vec_u16 (cpu, st, 0));
10383 }
10384
10385 /* 16 bit store scaled or unscaled zero-
10386 or sign-extended 16-bit register offset. */
10387 static void
10388 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10389 {
10390 unsigned rm = INSTR (20, 16);
10391 unsigned rn = INSTR (9, 5);
10392 unsigned st = INSTR (4, 0);
10393
10394 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10395 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10396 extension);
10397 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10398
10399 aarch64_set_mem_u16
10400 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10401 }
10402
10403 /* 32 bit store scaled unsigned 12 bit. */
10404 static void
10405 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10406 {
10407 unsigned st = INSTR (4, 0);
10408 unsigned rn = INSTR (9, 5);
10409
10410 aarch64_set_mem_u32
10411 (cpu,
10412 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10413 aarch64_get_vec_u32 (cpu, st, 0));
10414 }
10415
10416 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10417 static void
10418 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10419 {
10420 unsigned rn = INSTR (9, 5);
10421 unsigned st = INSTR (4, 0);
10422
10423 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10424
10425 if (wb != Post)
10426 address += offset;
10427
10428 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10429
10430 if (wb == Post)
10431 address += offset;
10432
10433 if (wb != NoWriteBack)
10434 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10435 }
10436
10437 /* 32 bit store scaled or unscaled zero-
10438 or sign-extended 32-bit register offset. */
10439 static void
10440 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10441 {
10442 unsigned rm = INSTR (20, 16);
10443 unsigned rn = INSTR (9, 5);
10444 unsigned st = INSTR (4, 0);
10445
10446 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10447 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10448 extension);
10449 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10450
10451 aarch64_set_mem_u32
10452 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10453 }
10454
10455 /* 64 bit store scaled unsigned 12 bit. */
10456 static void
10457 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10458 {
10459 unsigned st = INSTR (4, 0);
10460 unsigned rn = INSTR (9, 5);
10461
10462 aarch64_set_mem_u64
10463 (cpu,
10464 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10465 aarch64_get_vec_u64 (cpu, st, 0));
10466 }
10467
10468 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10469 static void
10470 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10471 {
10472 unsigned rn = INSTR (9, 5);
10473 unsigned st = INSTR (4, 0);
10474
10475 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10476
10477 if (wb != Post)
10478 address += offset;
10479
10480 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10481
10482 if (wb == Post)
10483 address += offset;
10484
10485 if (wb != NoWriteBack)
10486 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10487 }
10488
10489 /* 64 bit store scaled or unscaled zero-
10490 or sign-extended 32-bit register offset. */
10491 static void
10492 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10493 {
10494 unsigned rm = INSTR (20, 16);
10495 unsigned rn = INSTR (9, 5);
10496 unsigned st = INSTR (4, 0);
10497
10498 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10499 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10500 extension);
10501 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10502
10503 aarch64_set_mem_u64
10504 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10505 }
10506
10507 /* 128 bit store scaled unsigned 12 bit. */
10508 static void
10509 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10510 {
10511 FRegister a;
10512 unsigned st = INSTR (4, 0);
10513 unsigned rn = INSTR (9, 5);
10514 uint64_t addr;
10515
10516 aarch64_get_FP_long_double (cpu, st, & a);
10517
10518 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10519 aarch64_set_mem_long_double (cpu, addr, a);
10520 }
10521
10522 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10523 static void
10524 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10525 {
10526 FRegister a;
10527 unsigned rn = INSTR (9, 5);
10528 unsigned st = INSTR (4, 0);
10529 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10530
10531 if (wb != Post)
10532 address += offset;
10533
10534 aarch64_get_FP_long_double (cpu, st, & a);
10535 aarch64_set_mem_long_double (cpu, address, a);
10536
10537 if (wb == Post)
10538 address += offset;
10539
10540 if (wb != NoWriteBack)
10541 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10542 }
10543
10544 /* 128 bit store scaled or unscaled zero-
10545 or sign-extended 32-bit register offset. */
10546 static void
10547 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10548 {
10549 unsigned rm = INSTR (20, 16);
10550 unsigned rn = INSTR (9, 5);
10551 unsigned st = INSTR (4, 0);
10552
10553 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10554 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10555 extension);
10556 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10557
10558 FRegister a;
10559
10560 aarch64_get_FP_long_double (cpu, st, & a);
10561 aarch64_set_mem_long_double (cpu, address + displacement, a);
10562 }
10563
10564 static void
10565 dexLoadImmediatePrePost (sim_cpu *cpu)
10566 {
10567 /* instr[31,30] = size
10568 instr[29,27] = 111
10569 instr[26] = V
10570 instr[25,24] = 00
10571 instr[23,22] = opc
10572 instr[21] = 0
10573 instr[20,12] = simm9
10574 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10575 instr[10] = 0
10576 instr[9,5] = Rn may be SP.
10577 instr[4,0] = Rt */
10578
10579 uint32_t V = INSTR (26, 26);
10580 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10581 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10582 WriteBack wb = INSTR (11, 11);
10583
10584 if (!V)
10585 {
10586 /* GReg operations. */
10587 switch (dispatch)
10588 {
10589 case 0: strb_wb (cpu, imm, wb); return;
10590 case 1: ldrb32_wb (cpu, imm, wb); return;
10591 case 2: ldrsb_wb (cpu, imm, wb); return;
10592 case 3: ldrsb32_wb (cpu, imm, wb); return;
10593 case 4: strh_wb (cpu, imm, wb); return;
10594 case 5: ldrh32_wb (cpu, imm, wb); return;
10595 case 6: ldrsh64_wb (cpu, imm, wb); return;
10596 case 7: ldrsh32_wb (cpu, imm, wb); return;
10597 case 8: str32_wb (cpu, imm, wb); return;
10598 case 9: ldr32_wb (cpu, imm, wb); return;
10599 case 10: ldrsw_wb (cpu, imm, wb); return;
10600 case 12: str_wb (cpu, imm, wb); return;
10601 case 13: ldr_wb (cpu, imm, wb); return;
10602
10603 default:
10604 case 11:
10605 case 14:
10606 case 15:
10607 HALT_UNALLOC;
10608 }
10609 }
10610
10611 /* FReg operations. */
10612 switch (dispatch)
10613 {
10614 case 2: fstrq_wb (cpu, imm, wb); return;
10615 case 3: fldrq_wb (cpu, imm, wb); return;
10616 case 8: fstrs_wb (cpu, imm, wb); return;
10617 case 9: fldrs_wb (cpu, imm, wb); return;
10618 case 12: fstrd_wb (cpu, imm, wb); return;
10619 case 13: fldrd_wb (cpu, imm, wb); return;
10620
10621 case 0: /* STUR 8 bit FP. */
10622 case 1: /* LDUR 8 bit FP. */
10623 case 4: /* STUR 16 bit FP. */
10624 case 5: /* LDUR 8 bit FP. */
10625 HALT_NYI;
10626
10627 default:
10628 case 6:
10629 case 7:
10630 case 10:
10631 case 11:
10632 case 14:
10633 case 15:
10634 HALT_UNALLOC;
10635 }
10636 }
10637
10638 static void
10639 dexLoadRegisterOffset (sim_cpu *cpu)
10640 {
10641 /* instr[31,30] = size
10642 instr[29,27] = 111
10643 instr[26] = V
10644 instr[25,24] = 00
10645 instr[23,22] = opc
10646 instr[21] = 1
10647 instr[20,16] = rm
10648 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10649 110 ==> SXTW, 111 ==> SXTX,
10650 ow ==> RESERVED
10651 instr[12] = scaled
10652 instr[11,10] = 10
10653 instr[9,5] = rn
10654 instr[4,0] = rt. */
10655
10656 uint32_t V = INSTR (26, 26);
10657 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10658 Scaling scale = INSTR (12, 12);
10659 Extension extensionType = INSTR (15, 13);
10660
10661 /* Check for illegal extension types. */
10662 if (uimm (extensionType, 1, 1) == 0)
10663 HALT_UNALLOC;
10664
10665 if (extensionType == UXTX || extensionType == SXTX)
10666 extensionType = NoExtension;
10667
10668 if (!V)
10669 {
10670 /* GReg operations. */
10671 switch (dispatch)
10672 {
10673 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10674 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10675 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10676 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10677 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10678 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10679 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10680 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10681 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10682 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10683 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10684 case 12: str_scale_ext (cpu, scale, extensionType); return;
10685 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10686 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10687
10688 default:
10689 case 11:
10690 case 15:
10691 HALT_UNALLOC;
10692 }
10693 }
10694
10695 /* FReg operations. */
10696 switch (dispatch)
10697 {
10698 case 1: /* LDUR 8 bit FP. */
10699 HALT_NYI;
10700 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10701 case 5: /* LDUR 8 bit FP. */
10702 HALT_NYI;
10703 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10704 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10705
10706 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
10707 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
10708 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
10709 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
10710 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10711
10712 default:
10713 case 6:
10714 case 7:
10715 case 10:
10716 case 11:
10717 case 14:
10718 case 15:
10719 HALT_UNALLOC;
10720 }
10721 }
10722
10723 static void
10724 dexLoadUnsignedImmediate (sim_cpu *cpu)
10725 {
10726 /* instr[29,24] == 111_01
10727 instr[31,30] = size
10728 instr[26] = V
10729 instr[23,22] = opc
10730 instr[21,10] = uimm12 : unsigned immediate offset
10731 instr[9,5] = rn may be SP.
10732 instr[4,0] = rt. */
10733
10734 uint32_t V = INSTR (26,26);
10735 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10736 uint32_t imm = INSTR (21, 10);
10737
10738 if (!V)
10739 {
10740 /* GReg operations. */
10741 switch (dispatch)
10742 {
10743 case 0: strb_abs (cpu, imm); return;
10744 case 1: ldrb32_abs (cpu, imm); return;
10745 case 2: ldrsb_abs (cpu, imm); return;
10746 case 3: ldrsb32_abs (cpu, imm); return;
10747 case 4: strh_abs (cpu, imm); return;
10748 case 5: ldrh32_abs (cpu, imm); return;
10749 case 6: ldrsh_abs (cpu, imm); return;
10750 case 7: ldrsh32_abs (cpu, imm); return;
10751 case 8: str32_abs (cpu, imm); return;
10752 case 9: ldr32_abs (cpu, imm); return;
10753 case 10: ldrsw_abs (cpu, imm); return;
10754 case 12: str_abs (cpu, imm); return;
10755 case 13: ldr_abs (cpu, imm); return;
10756 case 14: prfm_abs (cpu, imm); return;
10757
10758 default:
10759 case 11:
10760 case 15:
10761 HALT_UNALLOC;
10762 }
10763 }
10764
10765 /* FReg operations. */
10766 switch (dispatch)
10767 {
10768 case 0: fstrb_abs (cpu, imm); return;
10769 case 4: fstrh_abs (cpu, imm); return;
10770 case 8: fstrs_abs (cpu, imm); return;
10771 case 12: fstrd_abs (cpu, imm); return;
10772 case 2: fstrq_abs (cpu, imm); return;
10773
10774 case 1: fldrb_abs (cpu, imm); return;
10775 case 5: fldrh_abs (cpu, imm); return;
10776 case 9: fldrs_abs (cpu, imm); return;
10777 case 13: fldrd_abs (cpu, imm); return;
10778 case 3: fldrq_abs (cpu, imm); return;
10779
10780 default:
10781 case 6:
10782 case 7:
10783 case 10:
10784 case 11:
10785 case 14:
10786 case 15:
10787 HALT_UNALLOC;
10788 }
10789 }
10790
10791 static void
10792 dexLoadExclusive (sim_cpu *cpu)
10793 {
10794 /* assert instr[29:24] = 001000;
10795 instr[31,30] = size
10796 instr[23] = 0 if exclusive
10797 instr[22] = L : 1 if load, 0 if store
10798 instr[21] = 1 if pair
10799 instr[20,16] = Rs
10800 instr[15] = o0 : 1 if ordered
10801 instr[14,10] = Rt2
10802 instr[9,5] = Rn
10803 instr[4.0] = Rt. */
10804
10805 switch (INSTR (22, 21))
10806 {
10807 case 2: ldxr (cpu); return;
10808 case 0: stxr (cpu); return;
10809 default: HALT_NYI;
10810 }
10811 }
10812
10813 static void
10814 dexLoadOther (sim_cpu *cpu)
10815 {
10816 uint32_t dispatch;
10817
10818 /* instr[29,25] = 111_0
10819 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10820 instr[21:11,10] is the secondary dispatch. */
10821 if (INSTR (24, 24))
10822 {
10823 dexLoadUnsignedImmediate (cpu);
10824 return;
10825 }
10826
10827 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
10828 switch (dispatch)
10829 {
10830 case 0: dexLoadUnscaledImmediate (cpu); return;
10831 case 1: dexLoadImmediatePrePost (cpu); return;
10832 case 3: dexLoadImmediatePrePost (cpu); return;
10833 case 6: dexLoadRegisterOffset (cpu); return;
10834
10835 default:
10836 case 2:
10837 case 4:
10838 case 5:
10839 case 7:
10840 HALT_NYI;
10841 }
10842 }
10843
10844 static void
10845 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10846 {
10847 unsigned rn = INSTR (14, 10);
10848 unsigned rd = INSTR (9, 5);
10849 unsigned rm = INSTR (4, 0);
10850 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10851
10852 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10853 HALT_UNALLOC; /* ??? */
10854
10855 offset <<= 2;
10856
10857 if (wb != Post)
10858 address += offset;
10859
10860 aarch64_set_mem_u32 (cpu, address,
10861 aarch64_get_reg_u32 (cpu, rm, NO_SP));
10862 aarch64_set_mem_u32 (cpu, address + 4,
10863 aarch64_get_reg_u32 (cpu, rn, NO_SP));
10864
10865 if (wb == Post)
10866 address += offset;
10867
10868 if (wb != NoWriteBack)
10869 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10870 }
10871
10872 static void
10873 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10874 {
10875 unsigned rn = INSTR (14, 10);
10876 unsigned rd = INSTR (9, 5);
10877 unsigned rm = INSTR (4, 0);
10878 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10879
10880 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10881 HALT_UNALLOC; /* ??? */
10882
10883 offset <<= 3;
10884
10885 if (wb != Post)
10886 address += offset;
10887
10888 aarch64_set_mem_u64 (cpu, address,
10889 aarch64_get_reg_u64 (cpu, rm, NO_SP));
10890 aarch64_set_mem_u64 (cpu, address + 8,
10891 aarch64_get_reg_u64 (cpu, rn, NO_SP));
10892
10893 if (wb == Post)
10894 address += offset;
10895
10896 if (wb != NoWriteBack)
10897 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10898 }
10899
10900 static void
10901 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10902 {
10903 unsigned rn = INSTR (14, 10);
10904 unsigned rd = INSTR (9, 5);
10905 unsigned rm = INSTR (4, 0);
10906 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10907
10908 /* Treat this as unalloc to make sure we don't do it. */
10909 if (rn == rm)
10910 HALT_UNALLOC;
10911
10912 offset <<= 2;
10913
10914 if (wb != Post)
10915 address += offset;
10916
10917 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
10918 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
10919
10920 if (wb == Post)
10921 address += offset;
10922
10923 if (wb != NoWriteBack)
10924 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10925 }
10926
10927 static void
10928 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10929 {
10930 unsigned rn = INSTR (14, 10);
10931 unsigned rd = INSTR (9, 5);
10932 unsigned rm = INSTR (4, 0);
10933 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10934
10935 /* Treat this as unalloc to make sure we don't do it. */
10936 if (rn == rm)
10937 HALT_UNALLOC;
10938
10939 offset <<= 2;
10940
10941 if (wb != Post)
10942 address += offset;
10943
10944 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
10945 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
10946
10947 if (wb == Post)
10948 address += offset;
10949
10950 if (wb != NoWriteBack)
10951 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10952 }
10953
10954 static void
10955 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10956 {
10957 unsigned rn = INSTR (14, 10);
10958 unsigned rd = INSTR (9, 5);
10959 unsigned rm = INSTR (4, 0);
10960 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10961
10962 /* Treat this as unalloc to make sure we don't do it. */
10963 if (rn == rm)
10964 HALT_UNALLOC;
10965
10966 offset <<= 3;
10967
10968 if (wb != Post)
10969 address += offset;
10970
10971 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
10972 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
10973
10974 if (wb == Post)
10975 address += offset;
10976
10977 if (wb != NoWriteBack)
10978 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10979 }
10980
10981 static void
10982 dex_load_store_pair_gr (sim_cpu *cpu)
10983 {
10984 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
10985 instr[29,25] = instruction encoding: 101_0
10986 instr[26] = V : 1 if fp 0 if gp
10987 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10988 instr[22] = load/store (1=> load)
10989 instr[21,15] = signed, scaled, offset
10990 instr[14,10] = Rn
10991 instr[ 9, 5] = Rd
10992 instr[ 4, 0] = Rm. */
10993
10994 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
10995 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10996
10997 switch (dispatch)
10998 {
10999 case 2: store_pair_u32 (cpu, offset, Post); return;
11000 case 3: load_pair_u32 (cpu, offset, Post); return;
11001 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11002 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
11003 case 6: store_pair_u32 (cpu, offset, Pre); return;
11004 case 7: load_pair_u32 (cpu, offset, Pre); return;
11005
11006 case 11: load_pair_s32 (cpu, offset, Post); return;
11007 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
11008 case 15: load_pair_s32 (cpu, offset, Pre); return;
11009
11010 case 18: store_pair_u64 (cpu, offset, Post); return;
11011 case 19: load_pair_u64 (cpu, offset, Post); return;
11012 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11013 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
11014 case 22: store_pair_u64 (cpu, offset, Pre); return;
11015 case 23: load_pair_u64 (cpu, offset, Pre); return;
11016
11017 default:
11018 HALT_UNALLOC;
11019 }
11020 }
11021
11022 static void
11023 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11024 {
11025 unsigned rn = INSTR (14, 10);
11026 unsigned rd = INSTR (9, 5);
11027 unsigned rm = INSTR (4, 0);
11028 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11029
11030 offset <<= 2;
11031
11032 if (wb != Post)
11033 address += offset;
11034
11035 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
11036 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11037
11038 if (wb == Post)
11039 address += offset;
11040
11041 if (wb != NoWriteBack)
11042 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11043 }
11044
11045 static void
11046 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11047 {
11048 unsigned rn = INSTR (14, 10);
11049 unsigned rd = INSTR (9, 5);
11050 unsigned rm = INSTR (4, 0);
11051 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11052
11053 offset <<= 3;
11054
11055 if (wb != Post)
11056 address += offset;
11057
11058 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
11059 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11060
11061 if (wb == Post)
11062 address += offset;
11063
11064 if (wb != NoWriteBack)
11065 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11066 }
11067
11068 static void
11069 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11070 {
11071 FRegister a;
11072 unsigned rn = INSTR (14, 10);
11073 unsigned rd = INSTR (9, 5);
11074 unsigned rm = INSTR (4, 0);
11075 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11076
11077 offset <<= 4;
11078
11079 if (wb != Post)
11080 address += offset;
11081
11082 aarch64_get_FP_long_double (cpu, rm, & a);
11083 aarch64_set_mem_long_double (cpu, address, a);
11084 aarch64_get_FP_long_double (cpu, rn, & a);
11085 aarch64_set_mem_long_double (cpu, address + 16, a);
11086
11087 if (wb == Post)
11088 address += offset;
11089
11090 if (wb != NoWriteBack)
11091 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11092 }
11093
11094 static void
11095 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11096 {
11097 unsigned rn = INSTR (14, 10);
11098 unsigned rd = INSTR (9, 5);
11099 unsigned rm = INSTR (4, 0);
11100 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11101
11102 if (rm == rn)
11103 HALT_UNALLOC;
11104
11105 offset <<= 2;
11106
11107 if (wb != Post)
11108 address += offset;
11109
11110 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11111 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11112
11113 if (wb == Post)
11114 address += offset;
11115
11116 if (wb != NoWriteBack)
11117 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11118 }
11119
11120 static void
11121 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11122 {
11123 unsigned rn = INSTR (14, 10);
11124 unsigned rd = INSTR (9, 5);
11125 unsigned rm = INSTR (4, 0);
11126 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11127
11128 if (rm == rn)
11129 HALT_UNALLOC;
11130
11131 offset <<= 3;
11132
11133 if (wb != Post)
11134 address += offset;
11135
11136 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11137 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11138
11139 if (wb == Post)
11140 address += offset;
11141
11142 if (wb != NoWriteBack)
11143 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11144 }
11145
11146 static void
11147 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11148 {
11149 FRegister a;
11150 unsigned rn = INSTR (14, 10);
11151 unsigned rd = INSTR (9, 5);
11152 unsigned rm = INSTR (4, 0);
11153 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11154
11155 if (rm == rn)
11156 HALT_UNALLOC;
11157
11158 offset <<= 4;
11159
11160 if (wb != Post)
11161 address += offset;
11162
11163 aarch64_get_mem_long_double (cpu, address, & a);
11164 aarch64_set_FP_long_double (cpu, rm, a);
11165 aarch64_get_mem_long_double (cpu, address + 16, & a);
11166 aarch64_set_FP_long_double (cpu, rn, a);
11167
11168 if (wb == Post)
11169 address += offset;
11170
11171 if (wb != NoWriteBack)
11172 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11173 }
11174
11175 static void
11176 dex_load_store_pair_fp (sim_cpu *cpu)
11177 {
11178 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11179 instr[29,25] = instruction encoding
11180 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11181 instr[22] = load/store (1=> load)
11182 instr[21,15] = signed, scaled, offset
11183 instr[14,10] = Rn
11184 instr[ 9, 5] = Rd
11185 instr[ 4, 0] = Rm */
11186
11187 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11188 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11189
11190 switch (dispatch)
11191 {
11192 case 2: store_pair_float (cpu, offset, Post); return;
11193 case 3: load_pair_float (cpu, offset, Post); return;
11194 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11195 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11196 case 6: store_pair_float (cpu, offset, Pre); return;
11197 case 7: load_pair_float (cpu, offset, Pre); return;
11198
11199 case 10: store_pair_double (cpu, offset, Post); return;
11200 case 11: load_pair_double (cpu, offset, Post); return;
11201 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11202 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11203 case 14: store_pair_double (cpu, offset, Pre); return;
11204 case 15: load_pair_double (cpu, offset, Pre); return;
11205
11206 case 18: store_pair_long_double (cpu, offset, Post); return;
11207 case 19: load_pair_long_double (cpu, offset, Post); return;
11208 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11209 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11210 case 22: store_pair_long_double (cpu, offset, Pre); return;
11211 case 23: load_pair_long_double (cpu, offset, Pre); return;
11212
11213 default:
11214 HALT_UNALLOC;
11215 }
11216 }
11217
11218 static inline unsigned
11219 vec_reg (unsigned v, unsigned o)
11220 {
11221 return (v + o) & 0x3F;
11222 }
11223
11224 /* Load multiple N-element structures to N consecutive registers. */
11225 static void
11226 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
11227 {
11228 int all = INSTR (30, 30);
11229 unsigned size = INSTR (11, 10);
11230 unsigned vd = INSTR (4, 0);
11231 unsigned i;
11232
11233 switch (size)
11234 {
11235 case 0: /* 8-bit operations. */
11236 if (all)
11237 for (i = 0; i < (16 * N); i++)
11238 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
11239 aarch64_get_mem_u8 (cpu, address + i));
11240 else
11241 for (i = 0; i < (8 * N); i++)
11242 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
11243 aarch64_get_mem_u8 (cpu, address + i));
11244 return;
11245
11246 case 1: /* 16-bit operations. */
11247 if (all)
11248 for (i = 0; i < (8 * N); i++)
11249 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
11250 aarch64_get_mem_u16 (cpu, address + i * 2));
11251 else
11252 for (i = 0; i < (4 * N); i++)
11253 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
11254 aarch64_get_mem_u16 (cpu, address + i * 2));
11255 return;
11256
11257 case 2: /* 32-bit operations. */
11258 if (all)
11259 for (i = 0; i < (4 * N); i++)
11260 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
11261 aarch64_get_mem_u32 (cpu, address + i * 4));
11262 else
11263 for (i = 0; i < (2 * N); i++)
11264 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
11265 aarch64_get_mem_u32 (cpu, address + i * 4));
11266 return;
11267
11268 case 3: /* 64-bit operations. */
11269 if (all)
11270 for (i = 0; i < (2 * N); i++)
11271 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
11272 aarch64_get_mem_u64 (cpu, address + i * 8));
11273 else
11274 for (i = 0; i < N; i++)
11275 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
11276 aarch64_get_mem_u64 (cpu, address + i * 8));
11277 return;
11278 }
11279 }
11280
11281 /* LD4: load multiple 4-element to four consecutive registers. */
11282 static void
11283 LD4 (sim_cpu *cpu, uint64_t address)
11284 {
11285 vec_load (cpu, address, 4);
11286 }
11287
11288 /* LD3: load multiple 3-element structures to three consecutive registers. */
11289 static void
11290 LD3 (sim_cpu *cpu, uint64_t address)
11291 {
11292 vec_load (cpu, address, 3);
11293 }
11294
11295 /* LD2: load multiple 2-element structures to two consecutive registers. */
11296 static void
11297 LD2 (sim_cpu *cpu, uint64_t address)
11298 {
11299 vec_load (cpu, address, 2);
11300 }
11301
11302 /* Load multiple 1-element structures into one register. */
11303 static void
11304 LD1_1 (sim_cpu *cpu, uint64_t address)
11305 {
11306 int all = INSTR (30, 30);
11307 unsigned size = INSTR (11, 10);
11308 unsigned vd = INSTR (4, 0);
11309 unsigned i;
11310
11311 switch (size)
11312 {
11313 case 0:
11314 /* LD1 {Vd.16b}, addr, #16 */
11315 /* LD1 {Vd.8b}, addr, #8 */
11316 for (i = 0; i < (all ? 16 : 8); i++)
11317 aarch64_set_vec_u8 (cpu, vd, i,
11318 aarch64_get_mem_u8 (cpu, address + i));
11319 return;
11320
11321 case 1:
11322 /* LD1 {Vd.8h}, addr, #16 */
11323 /* LD1 {Vd.4h}, addr, #8 */
11324 for (i = 0; i < (all ? 8 : 4); i++)
11325 aarch64_set_vec_u16 (cpu, vd, i,
11326 aarch64_get_mem_u16 (cpu, address + i * 2));
11327 return;
11328
11329 case 2:
11330 /* LD1 {Vd.4s}, addr, #16 */
11331 /* LD1 {Vd.2s}, addr, #8 */
11332 for (i = 0; i < (all ? 4 : 2); i++)
11333 aarch64_set_vec_u32 (cpu, vd, i,
11334 aarch64_get_mem_u32 (cpu, address + i * 4));
11335 return;
11336
11337 case 3:
11338 /* LD1 {Vd.2d}, addr, #16 */
11339 /* LD1 {Vd.1d}, addr, #8 */
11340 for (i = 0; i < (all ? 2 : 1); i++)
11341 aarch64_set_vec_u64 (cpu, vd, i,
11342 aarch64_get_mem_u64 (cpu, address + i * 8));
11343 return;
11344 }
11345 }
11346
11347 /* Load multiple 1-element structures into two registers. */
11348 static void
11349 LD1_2 (sim_cpu *cpu, uint64_t address)
11350 {
11351 /* FIXME: This algorithm is *exactly* the same as the LD2 version.
11352 So why have two different instructions ? There must be something
11353 wrong somewhere. */
11354 vec_load (cpu, address, 2);
11355 }
11356
11357 /* Load multiple 1-element structures into three registers. */
11358 static void
11359 LD1_3 (sim_cpu *cpu, uint64_t address)
11360 {
11361 /* FIXME: This algorithm is *exactly* the same as the LD3 version.
11362 So why have two different instructions ? There must be something
11363 wrong somewhere. */
11364 vec_load (cpu, address, 3);
11365 }
11366
11367 /* Load multiple 1-element structures into four registers. */
11368 static void
11369 LD1_4 (sim_cpu *cpu, uint64_t address)
11370 {
11371 /* FIXME: This algorithm is *exactly* the same as the LD4 version.
11372 So why have two different instructions ? There must be something
11373 wrong somewhere. */
11374 vec_load (cpu, address, 4);
11375 }
11376
11377 /* Store multiple N-element structures to N consecutive registers. */
11378 static void
11379 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
11380 {
11381 int all = INSTR (30, 30);
11382 unsigned size = INSTR (11, 10);
11383 unsigned vd = INSTR (4, 0);
11384 unsigned i;
11385
11386 switch (size)
11387 {
11388 case 0: /* 8-bit operations. */
11389 if (all)
11390 for (i = 0; i < (16 * N); i++)
11391 aarch64_set_mem_u8
11392 (cpu, address + i,
11393 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
11394 else
11395 for (i = 0; i < (8 * N); i++)
11396 aarch64_set_mem_u8
11397 (cpu, address + i,
11398 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
11399 return;
11400
11401 case 1: /* 16-bit operations. */
11402 if (all)
11403 for (i = 0; i < (8 * N); i++)
11404 aarch64_set_mem_u16
11405 (cpu, address + i * 2,
11406 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
11407 else
11408 for (i = 0; i < (4 * N); i++)
11409 aarch64_set_mem_u16
11410 (cpu, address + i * 2,
11411 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
11412 return;
11413
11414 case 2: /* 32-bit operations. */
11415 if (all)
11416 for (i = 0; i < (4 * N); i++)
11417 aarch64_set_mem_u32
11418 (cpu, address + i * 4,
11419 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
11420 else
11421 for (i = 0; i < (2 * N); i++)
11422 aarch64_set_mem_u32
11423 (cpu, address + i * 4,
11424 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
11425 return;
11426
11427 case 3: /* 64-bit operations. */
11428 if (all)
11429 for (i = 0; i < (2 * N); i++)
11430 aarch64_set_mem_u64
11431 (cpu, address + i * 8,
11432 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
11433 else
11434 for (i = 0; i < N; i++)
11435 aarch64_set_mem_u64
11436 (cpu, address + i * 8,
11437 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
11438 return;
11439 }
11440 }
11441
11442 /* Store multiple 4-element structure to four consecutive registers. */
11443 static void
11444 ST4 (sim_cpu *cpu, uint64_t address)
11445 {
11446 vec_store (cpu, address, 4);
11447 }
11448
11449 /* Store multiple 3-element structures to three consecutive registers. */
11450 static void
11451 ST3 (sim_cpu *cpu, uint64_t address)
11452 {
11453 vec_store (cpu, address, 3);
11454 }
11455
11456 /* Store multiple 2-element structures to two consecutive registers. */
11457 static void
11458 ST2 (sim_cpu *cpu, uint64_t address)
11459 {
11460 vec_store (cpu, address, 2);
11461 }
11462
11463 /* Store multiple 1-element structures into one register. */
11464 static void
11465 ST1_1 (sim_cpu *cpu, uint64_t address)
11466 {
11467 int all = INSTR (30, 30);
11468 unsigned size = INSTR (11, 10);
11469 unsigned vd = INSTR (4, 0);
11470 unsigned i;
11471
11472 switch (size)
11473 {
11474 case 0:
11475 for (i = 0; i < (all ? 16 : 8); i++)
11476 aarch64_set_mem_u8 (cpu, address + i,
11477 aarch64_get_vec_u8 (cpu, vd, i));
11478 return;
11479
11480 case 1:
11481 for (i = 0; i < (all ? 8 : 4); i++)
11482 aarch64_set_mem_u16 (cpu, address + i * 2,
11483 aarch64_get_vec_u16 (cpu, vd, i));
11484 return;
11485
11486 case 2:
11487 for (i = 0; i < (all ? 4 : 2); i++)
11488 aarch64_set_mem_u32 (cpu, address + i * 4,
11489 aarch64_get_vec_u32 (cpu, vd, i));
11490 return;
11491
11492 case 3:
11493 for (i = 0; i < (all ? 2 : 1); i++)
11494 aarch64_set_mem_u64 (cpu, address + i * 8,
11495 aarch64_get_vec_u64 (cpu, vd, i));
11496 return;
11497 }
11498 }
11499
11500 /* Store multiple 1-element structures into two registers. */
11501 static void
11502 ST1_2 (sim_cpu *cpu, uint64_t address)
11503 {
11504 /* FIXME: This algorithm is *exactly* the same as the ST2 version.
11505 So why have two different instructions ? There must be
11506 something wrong somewhere. */
11507 vec_store (cpu, address, 2);
11508 }
11509
11510 /* Store multiple 1-element structures into three registers. */
11511 static void
11512 ST1_3 (sim_cpu *cpu, uint64_t address)
11513 {
11514 /* FIXME: This algorithm is *exactly* the same as the ST3 version.
11515 So why have two different instructions ? There must be
11516 something wrong somewhere. */
11517 vec_store (cpu, address, 3);
11518 }
11519
11520 /* Store multiple 1-element structures into four registers. */
11521 static void
11522 ST1_4 (sim_cpu *cpu, uint64_t address)
11523 {
11524 /* FIXME: This algorithm is *exactly* the same as the ST4 version.
11525 So why have two different instructions ? There must be
11526 something wrong somewhere. */
11527 vec_store (cpu, address, 4);
11528 }
11529
11530 #define LDn_STn_SINGLE_LANE_AND_SIZE() \
11531 do \
11532 { \
11533 switch (INSTR (15, 14)) \
11534 { \
11535 case 0: \
11536 lane = (full << 3) | (s << 2) | size; \
11537 size = 0; \
11538 break; \
11539 \
11540 case 1: \
11541 if ((size & 1) == 1) \
11542 HALT_UNALLOC; \
11543 lane = (full << 2) | (s << 1) | (size >> 1); \
11544 size = 1; \
11545 break; \
11546 \
11547 case 2: \
11548 if ((size & 2) == 2) \
11549 HALT_UNALLOC; \
11550 \
11551 if ((size & 1) == 0) \
11552 { \
11553 lane = (full << 1) | s; \
11554 size = 2; \
11555 } \
11556 else \
11557 { \
11558 if (s) \
11559 HALT_UNALLOC; \
11560 lane = full; \
11561 size = 3; \
11562 } \
11563 break; \
11564 \
11565 default: \
11566 HALT_UNALLOC; \
11567 } \
11568 } \
11569 while (0)
11570
11571 /* Load single structure into one lane of N registers. */
11572 static void
11573 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11574 {
11575 /* instr[31] = 0
11576 instr[30] = element selector 0=>half, 1=>all elements
11577 instr[29,24] = 00 1101
11578 instr[23] = 0=>simple, 1=>post
11579 instr[22] = 1
11580 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11581 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11582 11111 (immediate post inc)
11583 instr[15,13] = opcode
11584 instr[12] = S, used for lane number
11585 instr[11,10] = size, also used for lane number
11586 instr[9,5] = address
11587 instr[4,0] = Vd */
11588
11589 unsigned full = INSTR (30, 30);
11590 unsigned vd = INSTR (4, 0);
11591 unsigned size = INSTR (11, 10);
11592 unsigned s = INSTR (12, 12);
11593 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11594 int lane = 0;
11595 int i;
11596
11597 NYI_assert (29, 24, 0x0D);
11598 NYI_assert (22, 22, 1);
11599
11600 /* Compute the lane number first (using size), and then compute size. */
11601 LDn_STn_SINGLE_LANE_AND_SIZE ();
11602
11603 for (i = 0; i < nregs; i++)
11604 switch (size)
11605 {
11606 case 0:
11607 {
11608 uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11609 aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11610 break;
11611 }
11612
11613 case 1:
11614 {
11615 uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11616 aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11617 break;
11618 }
11619
11620 case 2:
11621 {
11622 uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11623 aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11624 break;
11625 }
11626
11627 case 3:
11628 {
11629 uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11630 aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11631 break;
11632 }
11633 }
11634 }
11635
11636 /* Store single structure from one lane from N registers. */
11637 static void
11638 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11639 {
11640 /* instr[31] = 0
11641 instr[30] = element selector 0=>half, 1=>all elements
11642 instr[29,24] = 00 1101
11643 instr[23] = 0=>simple, 1=>post
11644 instr[22] = 0
11645 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11646 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11647 11111 (immediate post inc)
11648 instr[15,13] = opcode
11649 instr[12] = S, used for lane number
11650 instr[11,10] = size, also used for lane number
11651 instr[9,5] = address
11652 instr[4,0] = Vd */
11653
11654 unsigned full = INSTR (30, 30);
11655 unsigned vd = INSTR (4, 0);
11656 unsigned size = INSTR (11, 10);
11657 unsigned s = INSTR (12, 12);
11658 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11659 int lane = 0;
11660 int i;
11661
11662 NYI_assert (29, 24, 0x0D);
11663 NYI_assert (22, 22, 0);
11664
11665 /* Compute the lane number first (using size), and then compute size. */
11666 LDn_STn_SINGLE_LANE_AND_SIZE ();
11667
11668 for (i = 0; i < nregs; i++)
11669 switch (size)
11670 {
11671 case 0:
11672 {
11673 uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11674 aarch64_set_mem_u8 (cpu, address + i, val);
11675 break;
11676 }
11677
11678 case 1:
11679 {
11680 uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11681 aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11682 break;
11683 }
11684
11685 case 2:
11686 {
11687 uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11688 aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11689 break;
11690 }
11691
11692 case 3:
11693 {
11694 uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11695 aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11696 break;
11697 }
11698 }
11699 }
11700
11701 /* Load single structure into all lanes of N registers. */
11702 static void
11703 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11704 {
11705 /* instr[31] = 0
11706 instr[30] = element selector 0=>half, 1=>all elements
11707 instr[29,24] = 00 1101
11708 instr[23] = 0=>simple, 1=>post
11709 instr[22] = 1
11710 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11711 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11712 11111 (immediate post inc)
11713 instr[15,14] = 11
11714 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11715 instr[12] = 0
11716 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11717 10=> word(s), 11=> double(d)
11718 instr[9,5] = address
11719 instr[4,0] = Vd */
11720
11721 unsigned full = INSTR (30, 30);
11722 unsigned vd = INSTR (4, 0);
11723 unsigned size = INSTR (11, 10);
11724 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11725 int i, n;
11726
11727 NYI_assert (29, 24, 0x0D);
11728 NYI_assert (22, 22, 1);
11729 NYI_assert (15, 14, 3);
11730 NYI_assert (12, 12, 0);
11731
11732 for (n = 0; n < nregs; n++)
11733 switch (size)
11734 {
11735 case 0:
11736 {
11737 uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11738 for (i = 0; i < (full ? 16 : 8); i++)
11739 aarch64_set_vec_u8 (cpu, vd + n, i, val);
11740 break;
11741 }
11742
11743 case 1:
11744 {
11745 uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
11746 for (i = 0; i < (full ? 8 : 4); i++)
11747 aarch64_set_vec_u16 (cpu, vd + n, i, val);
11748 break;
11749 }
11750
11751 case 2:
11752 {
11753 uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
11754 for (i = 0; i < (full ? 4 : 2); i++)
11755 aarch64_set_vec_u32 (cpu, vd + n, i, val);
11756 break;
11757 }
11758
11759 case 3:
11760 {
11761 uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
11762 for (i = 0; i < (full ? 2 : 1); i++)
11763 aarch64_set_vec_u64 (cpu, vd + n, i, val);
11764 break;
11765 }
11766
11767 default:
11768 HALT_UNALLOC;
11769 }
11770 }
11771
11772 static void
11773 do_vec_load_store (sim_cpu *cpu)
11774 {
11775 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11776
11777 instr[31] = 0
11778 instr[30] = element selector 0=>half, 1=>all elements
11779 instr[29,25] = 00110
11780 instr[24] = 0=>multiple struct, 1=>single struct
11781 instr[23] = 0=>simple, 1=>post
11782 instr[22] = 0=>store, 1=>load
11783 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
11784 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11785 11111 (immediate post inc)
11786 instr[15,12] = elements and destinations. eg for load:
11787 0000=>LD4 => load multiple 4-element to
11788 four consecutive registers
11789 0100=>LD3 => load multiple 3-element to
11790 three consecutive registers
11791 1000=>LD2 => load multiple 2-element to
11792 two consecutive registers
11793 0010=>LD1 => load multiple 1-element to
11794 four consecutive registers
11795 0110=>LD1 => load multiple 1-element to
11796 three consecutive registers
11797 1010=>LD1 => load multiple 1-element to
11798 two consecutive registers
11799 0111=>LD1 => load multiple 1-element to
11800 one register
11801 1100=>LDR1,LDR2
11802 1110=>LDR3,LDR4
11803 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11804 10=> word(s), 11=> double(d)
11805 instr[9,5] = Vn, can be SP
11806 instr[4,0] = Vd */
11807
11808 int single;
11809 int post;
11810 int load;
11811 unsigned vn;
11812 uint64_t address;
11813 int type;
11814
11815 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
11816 HALT_NYI;
11817
11818 single = INSTR (24, 24);
11819 post = INSTR (23, 23);
11820 load = INSTR (22, 22);
11821 type = INSTR (15, 12);
11822 vn = INSTR (9, 5);
11823 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11824
11825 if (! single && INSTR (21, 21) != 0)
11826 HALT_UNALLOC;
11827
11828 if (post)
11829 {
11830 unsigned vm = INSTR (20, 16);
11831
11832 if (vm == R31)
11833 {
11834 unsigned sizeof_operation;
11835
11836 if (single)
11837 {
11838 if ((type >= 0) && (type <= 11))
11839 {
11840 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11841 switch (INSTR (15, 14))
11842 {
11843 case 0:
11844 sizeof_operation = nregs * 1;
11845 break;
11846 case 1:
11847 sizeof_operation = nregs * 2;
11848 break;
11849 case 2:
11850 if (INSTR (10, 10) == 0)
11851 sizeof_operation = nregs * 4;
11852 else
11853 sizeof_operation = nregs * 8;
11854 break;
11855 default:
11856 HALT_UNALLOC;
11857 }
11858 }
11859 else if (type == 0xC)
11860 {
11861 sizeof_operation = INSTR (21, 21) ? 2 : 1;
11862 sizeof_operation <<= INSTR (11, 10);
11863 }
11864 else if (type == 0xE)
11865 {
11866 sizeof_operation = INSTR (21, 21) ? 4 : 3;
11867 sizeof_operation <<= INSTR (11, 10);
11868 }
11869 else
11870 HALT_UNALLOC;
11871 }
11872 else
11873 {
11874 switch (type)
11875 {
11876 case 0: sizeof_operation = 32; break;
11877 case 4: sizeof_operation = 24; break;
11878 case 8: sizeof_operation = 16; break;
11879
11880 case 7:
11881 /* One register, immediate offset variant. */
11882 sizeof_operation = 8;
11883 break;
11884
11885 case 10:
11886 /* Two registers, immediate offset variant. */
11887 sizeof_operation = 16;
11888 break;
11889
11890 case 6:
11891 /* Three registers, immediate offset variant. */
11892 sizeof_operation = 24;
11893 break;
11894
11895 case 2:
11896 /* Four registers, immediate offset variant. */
11897 sizeof_operation = 32;
11898 break;
11899
11900 default:
11901 HALT_UNALLOC;
11902 }
11903
11904 if (INSTR (30, 30))
11905 sizeof_operation *= 2;
11906 }
11907
11908 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
11909 }
11910 else
11911 aarch64_set_reg_u64 (cpu, vn, SP_OK,
11912 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
11913 }
11914 else
11915 {
11916 NYI_assert (20, 16, 0);
11917 }
11918
11919 if (single)
11920 {
11921 if (load)
11922 {
11923 if ((type >= 0) && (type <= 11))
11924 do_vec_LDn_single (cpu, address);
11925 else if ((type == 0xC) || (type == 0xE))
11926 do_vec_LDnR (cpu, address);
11927 else
11928 HALT_UNALLOC;
11929 return;
11930 }
11931
11932 /* Stores. */
11933 if ((type >= 0) && (type <= 11))
11934 {
11935 do_vec_STn_single (cpu, address);
11936 return;
11937 }
11938
11939 HALT_UNALLOC;
11940 }
11941
11942 if (load)
11943 {
11944 switch (type)
11945 {
11946 case 0: LD4 (cpu, address); return;
11947 case 4: LD3 (cpu, address); return;
11948 case 8: LD2 (cpu, address); return;
11949 case 2: LD1_4 (cpu, address); return;
11950 case 6: LD1_3 (cpu, address); return;
11951 case 10: LD1_2 (cpu, address); return;
11952 case 7: LD1_1 (cpu, address); return;
11953
11954 default:
11955 HALT_UNALLOC;
11956 }
11957 }
11958
11959 /* Stores. */
11960 switch (type)
11961 {
11962 case 0: ST4 (cpu, address); return;
11963 case 4: ST3 (cpu, address); return;
11964 case 8: ST2 (cpu, address); return;
11965 case 2: ST1_4 (cpu, address); return;
11966 case 6: ST1_3 (cpu, address); return;
11967 case 10: ST1_2 (cpu, address); return;
11968 case 7: ST1_1 (cpu, address); return;
11969 default:
11970 HALT_UNALLOC;
11971 }
11972 }
11973
11974 static void
11975 dexLdSt (sim_cpu *cpu)
11976 {
11977 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
11978 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
11979 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
11980 bits [29,28:26] of a LS are the secondary dispatch vector. */
11981 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
11982
11983 switch (group2)
11984 {
11985 case LS_EXCL_000:
11986 dexLoadExclusive (cpu); return;
11987
11988 case LS_LIT_010:
11989 case LS_LIT_011:
11990 dexLoadLiteral (cpu); return;
11991
11992 case LS_OTHER_110:
11993 case LS_OTHER_111:
11994 dexLoadOther (cpu); return;
11995
11996 case LS_ADVSIMD_001:
11997 do_vec_load_store (cpu); return;
11998
11999 case LS_PAIR_100:
12000 dex_load_store_pair_gr (cpu); return;
12001
12002 case LS_PAIR_101:
12003 dex_load_store_pair_fp (cpu); return;
12004
12005 default:
12006 /* Should never reach here. */
12007 HALT_NYI;
12008 }
12009 }
12010
12011 /* Specific decode and execute for group Data Processing Register. */
12012
12013 static void
12014 dexLogicalShiftedRegister (sim_cpu *cpu)
12015 {
12016 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12017 instr[30,29] = op
12018 instr[28:24] = 01010
12019 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12020 instr[21] = N
12021 instr[20,16] = Rm
12022 instr[15,10] = count : must be 0xxxxx for 32 bit
12023 instr[9,5] = Rn
12024 instr[4,0] = Rd */
12025
12026 uint32_t size = INSTR (31, 31);
12027 Shift shiftType = INSTR (23, 22);
12028 uint32_t count = INSTR (15, 10);
12029
12030 /* 32 bit operations must have count[5] = 0.
12031 or else we have an UNALLOC. */
12032 if (size == 0 && uimm (count, 5, 5))
12033 HALT_UNALLOC;
12034
12035 /* Dispatch on size:op:N. */
12036 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12037 {
12038 case 0: and32_shift (cpu, shiftType, count); return;
12039 case 1: bic32_shift (cpu, shiftType, count); return;
12040 case 2: orr32_shift (cpu, shiftType, count); return;
12041 case 3: orn32_shift (cpu, shiftType, count); return;
12042 case 4: eor32_shift (cpu, shiftType, count); return;
12043 case 5: eon32_shift (cpu, shiftType, count); return;
12044 case 6: ands32_shift (cpu, shiftType, count); return;
12045 case 7: bics32_shift (cpu, shiftType, count); return;
12046 case 8: and64_shift (cpu, shiftType, count); return;
12047 case 9: bic64_shift (cpu, shiftType, count); return;
12048 case 10:orr64_shift (cpu, shiftType, count); return;
12049 case 11:orn64_shift (cpu, shiftType, count); return;
12050 case 12:eor64_shift (cpu, shiftType, count); return;
12051 case 13:eon64_shift (cpu, shiftType, count); return;
12052 case 14:ands64_shift (cpu, shiftType, count); return;
12053 case 15:bics64_shift (cpu, shiftType, count); return;
12054 }
12055 }
12056
12057 /* 32 bit conditional select. */
12058 static void
12059 csel32 (sim_cpu *cpu, CondCode cc)
12060 {
12061 unsigned rm = INSTR (20, 16);
12062 unsigned rn = INSTR (9, 5);
12063 unsigned rd = INSTR (4, 0);
12064
12065 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12066 testConditionCode (cpu, cc)
12067 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12068 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12069 }
12070
12071 /* 64 bit conditional select. */
12072 static void
12073 csel64 (sim_cpu *cpu, CondCode cc)
12074 {
12075 unsigned rm = INSTR (20, 16);
12076 unsigned rn = INSTR (9, 5);
12077 unsigned rd = INSTR (4, 0);
12078
12079 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12080 testConditionCode (cpu, cc)
12081 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12082 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12083 }
12084
12085 /* 32 bit conditional increment. */
12086 static void
12087 csinc32 (sim_cpu *cpu, CondCode cc)
12088 {
12089 unsigned rm = INSTR (20, 16);
12090 unsigned rn = INSTR (9, 5);
12091 unsigned rd = INSTR (4, 0);
12092
12093 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12094 testConditionCode (cpu, cc)
12095 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12096 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12097 }
12098
12099 /* 64 bit conditional increment. */
12100 static void
12101 csinc64 (sim_cpu *cpu, CondCode cc)
12102 {
12103 unsigned rm = INSTR (20, 16);
12104 unsigned rn = INSTR (9, 5);
12105 unsigned rd = INSTR (4, 0);
12106
12107 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12108 testConditionCode (cpu, cc)
12109 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12110 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12111 }
12112
12113 /* 32 bit conditional invert. */
12114 static void
12115 csinv32 (sim_cpu *cpu, CondCode cc)
12116 {
12117 unsigned rm = INSTR (20, 16);
12118 unsigned rn = INSTR (9, 5);
12119 unsigned rd = INSTR (4, 0);
12120
12121 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12122 testConditionCode (cpu, cc)
12123 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12124 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12125 }
12126
12127 /* 64 bit conditional invert. */
12128 static void
12129 csinv64 (sim_cpu *cpu, CondCode cc)
12130 {
12131 unsigned rm = INSTR (20, 16);
12132 unsigned rn = INSTR (9, 5);
12133 unsigned rd = INSTR (4, 0);
12134
12135 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12136 testConditionCode (cpu, cc)
12137 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12138 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12139 }
12140
12141 /* 32 bit conditional negate. */
12142 static void
12143 csneg32 (sim_cpu *cpu, CondCode cc)
12144 {
12145 unsigned rm = INSTR (20, 16);
12146 unsigned rn = INSTR (9, 5);
12147 unsigned rd = INSTR (4, 0);
12148
12149 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12150 testConditionCode (cpu, cc)
12151 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12152 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12153 }
12154
12155 /* 64 bit conditional negate. */
12156 static void
12157 csneg64 (sim_cpu *cpu, CondCode cc)
12158 {
12159 unsigned rm = INSTR (20, 16);
12160 unsigned rn = INSTR (9, 5);
12161 unsigned rd = INSTR (4, 0);
12162
12163 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12164 testConditionCode (cpu, cc)
12165 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12166 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12167 }
12168
12169 static void
12170 dexCondSelect (sim_cpu *cpu)
12171 {
12172 /* instr[28,21] = 11011011
12173 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12174 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12175 100 ==> CSINV, 101 ==> CSNEG,
12176 _1_ ==> UNALLOC
12177 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12178 instr[15,12] = cond
12179 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12180
12181 CondCode cc = INSTR (15, 12);
12182 uint32_t S = INSTR (29, 29);
12183 uint32_t op2 = INSTR (11, 10);
12184
12185 if (S == 1)
12186 HALT_UNALLOC;
12187
12188 if (op2 & 0x2)
12189 HALT_UNALLOC;
12190
12191 switch ((INSTR (31, 30) << 1) | op2)
12192 {
12193 case 0: csel32 (cpu, cc); return;
12194 case 1: csinc32 (cpu, cc); return;
12195 case 2: csinv32 (cpu, cc); return;
12196 case 3: csneg32 (cpu, cc); return;
12197 case 4: csel64 (cpu, cc); return;
12198 case 5: csinc64 (cpu, cc); return;
12199 case 6: csinv64 (cpu, cc); return;
12200 case 7: csneg64 (cpu, cc); return;
12201 }
12202 }
12203
12204 /* Some helpers for counting leading 1 or 0 bits. */
12205
12206 /* Counts the number of leading bits which are the same
12207 in a 32 bit value in the range 1 to 32. */
12208 static uint32_t
12209 leading32 (uint32_t value)
12210 {
12211 int32_t mask= 0xffff0000;
12212 uint32_t count= 16; /* Counts number of bits set in mask. */
12213 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12214 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12215
12216 while (lo + 1 < hi)
12217 {
12218 int32_t test = (value & mask);
12219
12220 if (test == 0 || test == mask)
12221 {
12222 lo = count;
12223 count = (lo + hi) / 2;
12224 mask >>= (count - lo);
12225 }
12226 else
12227 {
12228 hi = count;
12229 count = (lo + hi) / 2;
12230 mask <<= hi - count;
12231 }
12232 }
12233
12234 if (lo != hi)
12235 {
12236 int32_t test;
12237
12238 mask >>= 1;
12239 test = (value & mask);
12240
12241 if (test == 0 || test == mask)
12242 count = hi;
12243 else
12244 count = lo;
12245 }
12246
12247 return count;
12248 }
12249
12250 /* Counts the number of leading bits which are the same
12251 in a 64 bit value in the range 1 to 64. */
12252 static uint64_t
12253 leading64 (uint64_t value)
12254 {
12255 int64_t mask= 0xffffffff00000000LL;
12256 uint64_t count = 32; /* Counts number of bits set in mask. */
12257 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12258 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12259
12260 while (lo + 1 < hi)
12261 {
12262 int64_t test = (value & mask);
12263
12264 if (test == 0 || test == mask)
12265 {
12266 lo = count;
12267 count = (lo + hi) / 2;
12268 mask >>= (count - lo);
12269 }
12270 else
12271 {
12272 hi = count;
12273 count = (lo + hi) / 2;
12274 mask <<= hi - count;
12275 }
12276 }
12277
12278 if (lo != hi)
12279 {
12280 int64_t test;
12281
12282 mask >>= 1;
12283 test = (value & mask);
12284
12285 if (test == 0 || test == mask)
12286 count = hi;
12287 else
12288 count = lo;
12289 }
12290
12291 return count;
12292 }
12293
12294 /* Bit operations. */
12295 /* N.B register args may not be SP. */
12296
12297 /* 32 bit count leading sign bits. */
12298 static void
12299 cls32 (sim_cpu *cpu)
12300 {
12301 unsigned rn = INSTR (9, 5);
12302 unsigned rd = INSTR (4, 0);
12303
12304 /* N.B. the result needs to exclude the leading bit. */
12305 aarch64_set_reg_u64
12306 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12307 }
12308
12309 /* 64 bit count leading sign bits. */
12310 static void
12311 cls64 (sim_cpu *cpu)
12312 {
12313 unsigned rn = INSTR (9, 5);
12314 unsigned rd = INSTR (4, 0);
12315
12316 /* N.B. the result needs to exclude the leading bit. */
12317 aarch64_set_reg_u64
12318 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12319 }
12320
12321 /* 32 bit count leading zero bits. */
12322 static void
12323 clz32 (sim_cpu *cpu)
12324 {
12325 unsigned rn = INSTR (9, 5);
12326 unsigned rd = INSTR (4, 0);
12327 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12328
12329 /* if the sign (top) bit is set then the count is 0. */
12330 if (pick32 (value, 31, 31))
12331 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12332 else
12333 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12334 }
12335
12336 /* 64 bit count leading zero bits. */
12337 static void
12338 clz64 (sim_cpu *cpu)
12339 {
12340 unsigned rn = INSTR (9, 5);
12341 unsigned rd = INSTR (4, 0);
12342 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12343
12344 /* if the sign (top) bit is set then the count is 0. */
12345 if (pick64 (value, 63, 63))
12346 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12347 else
12348 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12349 }
12350
12351 /* 32 bit reverse bits. */
12352 static void
12353 rbit32 (sim_cpu *cpu)
12354 {
12355 unsigned rn = INSTR (9, 5);
12356 unsigned rd = INSTR (4, 0);
12357 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12358 uint32_t result = 0;
12359 int i;
12360
12361 for (i = 0; i < 32; i++)
12362 {
12363 result <<= 1;
12364 result |= (value & 1);
12365 value >>= 1;
12366 }
12367 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12368 }
12369
12370 /* 64 bit reverse bits. */
12371 static void
12372 rbit64 (sim_cpu *cpu)
12373 {
12374 unsigned rn = INSTR (9, 5);
12375 unsigned rd = INSTR (4, 0);
12376 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12377 uint64_t result = 0;
12378 int i;
12379
12380 for (i = 0; i < 64; i++)
12381 {
12382 result <<= 1;
12383 result |= (value & 1UL);
12384 value >>= 1;
12385 }
12386 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12387 }
12388
12389 /* 32 bit reverse bytes. */
12390 static void
12391 rev32 (sim_cpu *cpu)
12392 {
12393 unsigned rn = INSTR (9, 5);
12394 unsigned rd = INSTR (4, 0);
12395 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12396 uint32_t result = 0;
12397 int i;
12398
12399 for (i = 0; i < 4; i++)
12400 {
12401 result <<= 8;
12402 result |= (value & 0xff);
12403 value >>= 8;
12404 }
12405 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12406 }
12407
12408 /* 64 bit reverse bytes. */
12409 static void
12410 rev64 (sim_cpu *cpu)
12411 {
12412 unsigned rn = INSTR (9, 5);
12413 unsigned rd = INSTR (4, 0);
12414 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12415 uint64_t result = 0;
12416 int i;
12417
12418 for (i = 0; i < 8; i++)
12419 {
12420 result <<= 8;
12421 result |= (value & 0xffULL);
12422 value >>= 8;
12423 }
12424 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12425 }
12426
12427 /* 32 bit reverse shorts. */
12428 /* N.B.this reverses the order of the bytes in each half word. */
12429 static void
12430 revh32 (sim_cpu *cpu)
12431 {
12432 unsigned rn = INSTR (9, 5);
12433 unsigned rd = INSTR (4, 0);
12434 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12435 uint32_t result = 0;
12436 int i;
12437
12438 for (i = 0; i < 2; i++)
12439 {
12440 result <<= 8;
12441 result |= (value & 0x00ff00ff);
12442 value >>= 8;
12443 }
12444 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12445 }
12446
12447 /* 64 bit reverse shorts. */
12448 /* N.B.this reverses the order of the bytes in each half word. */
12449 static void
12450 revh64 (sim_cpu *cpu)
12451 {
12452 unsigned rn = INSTR (9, 5);
12453 unsigned rd = INSTR (4, 0);
12454 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12455 uint64_t result = 0;
12456 int i;
12457
12458 for (i = 0; i < 2; i++)
12459 {
12460 result <<= 8;
12461 result |= (value & 0x00ff00ff00ff00ffULL);
12462 value >>= 8;
12463 }
12464 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12465 }
12466
12467 static void
12468 dexDataProc1Source (sim_cpu *cpu)
12469 {
12470 /* instr[30] = 1
12471 instr[28,21] = 111010110
12472 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12473 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12474 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12475 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12476 000010 ==> REV, 000011 ==> UNALLOC
12477 000100 ==> CLZ, 000101 ==> CLS
12478 ow ==> UNALLOC
12479 instr[9,5] = rn : may not be SP
12480 instr[4,0] = rd : may not be SP. */
12481
12482 uint32_t S = INSTR (29, 29);
12483 uint32_t opcode2 = INSTR (20, 16);
12484 uint32_t opcode = INSTR (15, 10);
12485 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12486
12487 if (S == 1)
12488 HALT_UNALLOC;
12489
12490 if (opcode2 != 0)
12491 HALT_UNALLOC;
12492
12493 if (opcode & 0x38)
12494 HALT_UNALLOC;
12495
12496 switch (dispatch)
12497 {
12498 case 0: rbit32 (cpu); return;
12499 case 1: revh32 (cpu); return;
12500 case 2: rev32 (cpu); return;
12501 case 4: clz32 (cpu); return;
12502 case 5: cls32 (cpu); return;
12503 case 8: rbit64 (cpu); return;
12504 case 9: revh64 (cpu); return;
12505 case 10:rev32 (cpu); return;
12506 case 11:rev64 (cpu); return;
12507 case 12:clz64 (cpu); return;
12508 case 13:cls64 (cpu); return;
12509 default: HALT_UNALLOC;
12510 }
12511 }
12512
12513 /* Variable shift.
12514 Shifts by count supplied in register.
12515 N.B register args may not be SP.
12516 These all use the shifted auxiliary function for
12517 simplicity and clarity. Writing the actual shift
12518 inline would avoid a branch and so be faster but
12519 would also necessitate getting signs right. */
12520
12521 /* 32 bit arithmetic shift right. */
12522 static void
12523 asrv32 (sim_cpu *cpu)
12524 {
12525 unsigned rm = INSTR (20, 16);
12526 unsigned rn = INSTR (9, 5);
12527 unsigned rd = INSTR (4, 0);
12528
12529 aarch64_set_reg_u64
12530 (cpu, rd, NO_SP,
12531 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12532 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12533 }
12534
12535 /* 64 bit arithmetic shift right. */
12536 static void
12537 asrv64 (sim_cpu *cpu)
12538 {
12539 unsigned rm = INSTR (20, 16);
12540 unsigned rn = INSTR (9, 5);
12541 unsigned rd = INSTR (4, 0);
12542
12543 aarch64_set_reg_u64
12544 (cpu, rd, NO_SP,
12545 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12546 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12547 }
12548
12549 /* 32 bit logical shift left. */
12550 static void
12551 lslv32 (sim_cpu *cpu)
12552 {
12553 unsigned rm = INSTR (20, 16);
12554 unsigned rn = INSTR (9, 5);
12555 unsigned rd = INSTR (4, 0);
12556
12557 aarch64_set_reg_u64
12558 (cpu, rd, NO_SP,
12559 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12560 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12561 }
12562
12563 /* 64 bit arithmetic shift left. */
12564 static void
12565 lslv64 (sim_cpu *cpu)
12566 {
12567 unsigned rm = INSTR (20, 16);
12568 unsigned rn = INSTR (9, 5);
12569 unsigned rd = INSTR (4, 0);
12570
12571 aarch64_set_reg_u64
12572 (cpu, rd, NO_SP,
12573 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12574 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12575 }
12576
12577 /* 32 bit logical shift right. */
12578 static void
12579 lsrv32 (sim_cpu *cpu)
12580 {
12581 unsigned rm = INSTR (20, 16);
12582 unsigned rn = INSTR (9, 5);
12583 unsigned rd = INSTR (4, 0);
12584
12585 aarch64_set_reg_u64
12586 (cpu, rd, NO_SP,
12587 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12588 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12589 }
12590
12591 /* 64 bit logical shift right. */
12592 static void
12593 lsrv64 (sim_cpu *cpu)
12594 {
12595 unsigned rm = INSTR (20, 16);
12596 unsigned rn = INSTR (9, 5);
12597 unsigned rd = INSTR (4, 0);
12598
12599 aarch64_set_reg_u64
12600 (cpu, rd, NO_SP,
12601 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12602 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12603 }
12604
12605 /* 32 bit rotate right. */
12606 static void
12607 rorv32 (sim_cpu *cpu)
12608 {
12609 unsigned rm = INSTR (20, 16);
12610 unsigned rn = INSTR (9, 5);
12611 unsigned rd = INSTR (4, 0);
12612
12613 aarch64_set_reg_u64
12614 (cpu, rd, NO_SP,
12615 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12616 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12617 }
12618
12619 /* 64 bit rotate right. */
12620 static void
12621 rorv64 (sim_cpu *cpu)
12622 {
12623 unsigned rm = INSTR (20, 16);
12624 unsigned rn = INSTR (9, 5);
12625 unsigned rd = INSTR (4, 0);
12626
12627 aarch64_set_reg_u64
12628 (cpu, rd, NO_SP,
12629 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12630 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12631 }
12632
12633
12634 /* divide. */
12635
12636 /* 32 bit signed divide. */
12637 static void
12638 cpuiv32 (sim_cpu *cpu)
12639 {
12640 unsigned rm = INSTR (20, 16);
12641 unsigned rn = INSTR (9, 5);
12642 unsigned rd = INSTR (4, 0);
12643 /* N.B. the pseudo-code does the divide using 64 bit data. */
12644 /* TODO : check that this rounds towards zero as required. */
12645 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12646 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12647
12648 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12649 divisor ? ((int32_t) (dividend / divisor)) : 0);
12650 }
12651
12652 /* 64 bit signed divide. */
12653 static void
12654 cpuiv64 (sim_cpu *cpu)
12655 {
12656 unsigned rm = INSTR (20, 16);
12657 unsigned rn = INSTR (9, 5);
12658 unsigned rd = INSTR (4, 0);
12659
12660 /* TODO : check that this rounds towards zero as required. */
12661 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12662
12663 aarch64_set_reg_s64
12664 (cpu, rd, NO_SP,
12665 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12666 }
12667
12668 /* 32 bit unsigned divide. */
12669 static void
12670 udiv32 (sim_cpu *cpu)
12671 {
12672 unsigned rm = INSTR (20, 16);
12673 unsigned rn = INSTR (9, 5);
12674 unsigned rd = INSTR (4, 0);
12675
12676 /* N.B. the pseudo-code does the divide using 64 bit data. */
12677 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12678 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12679
12680 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12681 divisor ? (uint32_t) (dividend / divisor) : 0);
12682 }
12683
12684 /* 64 bit unsigned divide. */
12685 static void
12686 udiv64 (sim_cpu *cpu)
12687 {
12688 unsigned rm = INSTR (20, 16);
12689 unsigned rn = INSTR (9, 5);
12690 unsigned rd = INSTR (4, 0);
12691
12692 /* TODO : check that this rounds towards zero as required. */
12693 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12694
12695 aarch64_set_reg_u64
12696 (cpu, rd, NO_SP,
12697 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12698 }
12699
12700 static void
12701 dexDataProc2Source (sim_cpu *cpu)
12702 {
12703 /* assert instr[30] == 0
12704 instr[28,21] == 11010110
12705 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12706 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12707 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12708 001000 ==> LSLV, 001001 ==> LSRV
12709 001010 ==> ASRV, 001011 ==> RORV
12710 ow ==> UNALLOC. */
12711
12712 uint32_t dispatch;
12713 uint32_t S = INSTR (29, 29);
12714 uint32_t opcode = INSTR (15, 10);
12715
12716 if (S == 1)
12717 HALT_UNALLOC;
12718
12719 if (opcode & 0x34)
12720 HALT_UNALLOC;
12721
12722 dispatch = ( (INSTR (31, 31) << 3)
12723 | (uimm (opcode, 3, 3) << 2)
12724 | uimm (opcode, 1, 0));
12725 switch (dispatch)
12726 {
12727 case 2: udiv32 (cpu); return;
12728 case 3: cpuiv32 (cpu); return;
12729 case 4: lslv32 (cpu); return;
12730 case 5: lsrv32 (cpu); return;
12731 case 6: asrv32 (cpu); return;
12732 case 7: rorv32 (cpu); return;
12733 case 10: udiv64 (cpu); return;
12734 case 11: cpuiv64 (cpu); return;
12735 case 12: lslv64 (cpu); return;
12736 case 13: lsrv64 (cpu); return;
12737 case 14: asrv64 (cpu); return;
12738 case 15: rorv64 (cpu); return;
12739 default: HALT_UNALLOC;
12740 }
12741 }
12742
12743
12744 /* Multiply. */
12745
12746 /* 32 bit multiply and add. */
12747 static void
12748 madd32 (sim_cpu *cpu)
12749 {
12750 unsigned rm = INSTR (20, 16);
12751 unsigned ra = INSTR (14, 10);
12752 unsigned rn = INSTR (9, 5);
12753 unsigned rd = INSTR (4, 0);
12754
12755 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12756 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12757 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12758 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12759 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12760 }
12761
12762 /* 64 bit multiply and add. */
12763 static void
12764 madd64 (sim_cpu *cpu)
12765 {
12766 unsigned rm = INSTR (20, 16);
12767 unsigned ra = INSTR (14, 10);
12768 unsigned rn = INSTR (9, 5);
12769 unsigned rd = INSTR (4, 0);
12770
12771 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12772 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12773 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12774 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12775 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12776 }
12777
12778 /* 32 bit multiply and sub. */
12779 static void
12780 msub32 (sim_cpu *cpu)
12781 {
12782 unsigned rm = INSTR (20, 16);
12783 unsigned ra = INSTR (14, 10);
12784 unsigned rn = INSTR (9, 5);
12785 unsigned rd = INSTR (4, 0);
12786
12787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12788 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12789 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12790 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12791 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12792 }
12793
12794 /* 64 bit multiply and sub. */
12795 static void
12796 msub64 (sim_cpu *cpu)
12797 {
12798 unsigned rm = INSTR (20, 16);
12799 unsigned ra = INSTR (14, 10);
12800 unsigned rn = INSTR (9, 5);
12801 unsigned rd = INSTR (4, 0);
12802
12803 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12804 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12805 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12806 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12807 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12808 }
12809
12810 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
12811 static void
12812 smaddl (sim_cpu *cpu)
12813 {
12814 unsigned rm = INSTR (20, 16);
12815 unsigned ra = INSTR (14, 10);
12816 unsigned rn = INSTR (9, 5);
12817 unsigned rd = INSTR (4, 0);
12818
12819 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12820 obtain a 64 bit product. */
12821 aarch64_set_reg_s64
12822 (cpu, rd, NO_SP,
12823 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12824 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12825 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12826 }
12827
12828 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12829 static void
12830 smsubl (sim_cpu *cpu)
12831 {
12832 unsigned rm = INSTR (20, 16);
12833 unsigned ra = INSTR (14, 10);
12834 unsigned rn = INSTR (9, 5);
12835 unsigned rd = INSTR (4, 0);
12836
12837 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12838 obtain a 64 bit product. */
12839 aarch64_set_reg_s64
12840 (cpu, rd, NO_SP,
12841 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12842 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12843 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12844 }
12845
12846 /* Integer Multiply/Divide. */
12847
12848 /* First some macros and a helper function. */
12849 /* Macros to test or access elements of 64 bit words. */
12850
12851 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
12852 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12853 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12854 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12855 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12856 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12857
12858 /* Offset of sign bit in 64 bit signed integger. */
12859 #define SIGN_SHIFT_U64 63
12860 /* The sign bit itself -- also identifies the minimum negative int value. */
12861 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12862 /* Return true if a 64 bit signed int presented as an unsigned int is the
12863 most negative value. */
12864 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12865 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12866 int has its sign bit set to false. */
12867 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12868 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12869 an unsigned int has its sign bit set or not. */
12870 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12871 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
12872 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12873
12874 /* Multiply two 64 bit ints and return.
12875 the hi 64 bits of the 128 bit product. */
12876
12877 static uint64_t
12878 mul64hi (uint64_t value1, uint64_t value2)
12879 {
12880 uint64_t resultmid1;
12881 uint64_t result;
12882 uint64_t value1_lo = lowWordToU64 (value1);
12883 uint64_t value1_hi = highWordToU64 (value1) ;
12884 uint64_t value2_lo = lowWordToU64 (value2);
12885 uint64_t value2_hi = highWordToU64 (value2);
12886
12887 /* Cross-multiply and collect results. */
12888 uint64_t xproductlo = value1_lo * value2_lo;
12889 uint64_t xproductmid1 = value1_lo * value2_hi;
12890 uint64_t xproductmid2 = value1_hi * value2_lo;
12891 uint64_t xproducthi = value1_hi * value2_hi;
12892 uint64_t carry = 0;
12893 /* Start accumulating 64 bit results. */
12894 /* Drop bottom half of lowest cross-product. */
12895 uint64_t resultmid = xproductlo >> 32;
12896 /* Add in middle products. */
12897 resultmid = resultmid + xproductmid1;
12898
12899 /* Check for overflow. */
12900 if (resultmid < xproductmid1)
12901 /* Carry over 1 into top cross-product. */
12902 carry++;
12903
12904 resultmid1 = resultmid + xproductmid2;
12905
12906 /* Check for overflow. */
12907 if (resultmid1 < xproductmid2)
12908 /* Carry over 1 into top cross-product. */
12909 carry++;
12910
12911 /* Drop lowest 32 bits of middle cross-product. */
12912 result = resultmid1 >> 32;
12913
12914 /* Add top cross-product plus and any carry. */
12915 result += xproducthi + carry;
12916
12917 return result;
12918 }
12919
12920 /* Signed multiply high, source, source2 :
12921 64 bit, dest <-- high 64-bit of result. */
12922 static void
12923 smulh (sim_cpu *cpu)
12924 {
12925 uint64_t uresult;
12926 int64_t result;
12927 unsigned rm = INSTR (20, 16);
12928 unsigned rn = INSTR (9, 5);
12929 unsigned rd = INSTR (4, 0);
12930 GReg ra = INSTR (14, 10);
12931 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12932 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12933 uint64_t uvalue1;
12934 uint64_t uvalue2;
12935 int64_t signum = 1;
12936
12937 if (ra != R31)
12938 HALT_UNALLOC;
12939
12940 /* Convert to unsigned and use the unsigned mul64hi routine
12941 the fix the sign up afterwards. */
12942 if (value1 < 0)
12943 {
12944 signum *= -1L;
12945 uvalue1 = -value1;
12946 }
12947 else
12948 {
12949 uvalue1 = value1;
12950 }
12951
12952 if (value2 < 0)
12953 {
12954 signum *= -1L;
12955 uvalue2 = -value2;
12956 }
12957 else
12958 {
12959 uvalue2 = value2;
12960 }
12961
12962 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12963 uresult = mul64hi (uvalue1, uvalue2);
12964 result = uresult;
12965 result *= signum;
12966
12967 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
12968 }
12969
12970 /* Unsigned multiply add long -- source, source2 :
12971 32 bit, source3 : 64 bit. */
12972 static void
12973 umaddl (sim_cpu *cpu)
12974 {
12975 unsigned rm = INSTR (20, 16);
12976 unsigned ra = INSTR (14, 10);
12977 unsigned rn = INSTR (9, 5);
12978 unsigned rd = INSTR (4, 0);
12979
12980 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12981 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12982 obtain a 64 bit product. */
12983 aarch64_set_reg_u64
12984 (cpu, rd, NO_SP,
12985 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12986 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12987 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12988 }
12989
12990 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12991 static void
12992 umsubl (sim_cpu *cpu)
12993 {
12994 unsigned rm = INSTR (20, 16);
12995 unsigned ra = INSTR (14, 10);
12996 unsigned rn = INSTR (9, 5);
12997 unsigned rd = INSTR (4, 0);
12998
12999 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13000 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13001 obtain a 64 bit product. */
13002 aarch64_set_reg_u64
13003 (cpu, rd, NO_SP,
13004 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13005 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13006 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13007 }
13008
13009 /* Unsigned multiply high, source, source2 :
13010 64 bit, dest <-- high 64-bit of result. */
13011 static void
13012 umulh (sim_cpu *cpu)
13013 {
13014 unsigned rm = INSTR (20, 16);
13015 unsigned rn = INSTR (9, 5);
13016 unsigned rd = INSTR (4, 0);
13017 GReg ra = INSTR (14, 10);
13018
13019 if (ra != R31)
13020 HALT_UNALLOC;
13021
13022 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13023 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13024 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13025 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13026 }
13027
13028 static void
13029 dexDataProc3Source (sim_cpu *cpu)
13030 {
13031 /* assert instr[28,24] == 11011. */
13032 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13033 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13034 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13035 instr[15] = o0 : 0/1 ==> ok
13036 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
13037 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13038 0100 ==> SMULH, (64 bit only)
13039 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13040 1100 ==> UMULH (64 bit only)
13041 ow ==> UNALLOC. */
13042
13043 uint32_t dispatch;
13044 uint32_t size = INSTR (31, 31);
13045 uint32_t op54 = INSTR (30, 29);
13046 uint32_t op31 = INSTR (23, 21);
13047 uint32_t o0 = INSTR (15, 15);
13048
13049 if (op54 != 0)
13050 HALT_UNALLOC;
13051
13052 if (size == 0)
13053 {
13054 if (op31 != 0)
13055 HALT_UNALLOC;
13056
13057 if (o0 == 0)
13058 madd32 (cpu);
13059 else
13060 msub32 (cpu);
13061 return;
13062 }
13063
13064 dispatch = (op31 << 1) | o0;
13065
13066 switch (dispatch)
13067 {
13068 case 0: madd64 (cpu); return;
13069 case 1: msub64 (cpu); return;
13070 case 2: smaddl (cpu); return;
13071 case 3: smsubl (cpu); return;
13072 case 4: smulh (cpu); return;
13073 case 10: umaddl (cpu); return;
13074 case 11: umsubl (cpu); return;
13075 case 12: umulh (cpu); return;
13076 default: HALT_UNALLOC;
13077 }
13078 }
13079
13080 static void
13081 dexDPReg (sim_cpu *cpu)
13082 {
13083 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13084 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13085 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
13086 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13087
13088 switch (group2)
13089 {
13090 case DPREG_LOG_000:
13091 case DPREG_LOG_001:
13092 dexLogicalShiftedRegister (cpu); return;
13093
13094 case DPREG_ADDSHF_010:
13095 dexAddSubtractShiftedRegister (cpu); return;
13096
13097 case DPREG_ADDEXT_011:
13098 dexAddSubtractExtendedRegister (cpu); return;
13099
13100 case DPREG_ADDCOND_100:
13101 {
13102 /* This set bundles a variety of different operations. */
13103 /* Check for. */
13104 /* 1) add/sub w carry. */
13105 uint32_t mask1 = 0x1FE00000U;
13106 uint32_t val1 = 0x1A000000U;
13107 /* 2) cond compare register/immediate. */
13108 uint32_t mask2 = 0x1FE00000U;
13109 uint32_t val2 = 0x1A400000U;
13110 /* 3) cond select. */
13111 uint32_t mask3 = 0x1FE00000U;
13112 uint32_t val3 = 0x1A800000U;
13113 /* 4) data proc 1/2 source. */
13114 uint32_t mask4 = 0x1FE00000U;
13115 uint32_t val4 = 0x1AC00000U;
13116
13117 if ((aarch64_get_instr (cpu) & mask1) == val1)
13118 dexAddSubtractWithCarry (cpu);
13119
13120 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13121 CondCompare (cpu);
13122
13123 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13124 dexCondSelect (cpu);
13125
13126 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13127 {
13128 /* Bit 30 is clear for data proc 2 source
13129 and set for data proc 1 source. */
13130 if (aarch64_get_instr (cpu) & (1U << 30))
13131 dexDataProc1Source (cpu);
13132 else
13133 dexDataProc2Source (cpu);
13134 }
13135
13136 else
13137 /* Should not reach here. */
13138 HALT_NYI;
13139
13140 return;
13141 }
13142
13143 case DPREG_3SRC_110:
13144 dexDataProc3Source (cpu); return;
13145
13146 case DPREG_UNALLOC_101:
13147 HALT_UNALLOC;
13148
13149 case DPREG_3SRC_111:
13150 dexDataProc3Source (cpu); return;
13151
13152 default:
13153 /* Should never reach here. */
13154 HALT_NYI;
13155 }
13156 }
13157
13158 /* Unconditional Branch immediate.
13159 Offset is a PC-relative byte offset in the range +/- 128MiB.
13160 The offset is assumed to be raw from the decode i.e. the
13161 simulator is expected to scale them from word offsets to byte. */
13162
13163 /* Unconditional branch. */
13164 static void
13165 buc (sim_cpu *cpu, int32_t offset)
13166 {
13167 aarch64_set_next_PC_by_offset (cpu, offset);
13168 }
13169
13170 static unsigned stack_depth = 0;
13171
13172 /* Unconditional branch and link -- writes return PC to LR. */
13173 static void
13174 bl (sim_cpu *cpu, int32_t offset)
13175 {
13176 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13177 aarch64_save_LR (cpu);
13178 aarch64_set_next_PC_by_offset (cpu, offset);
13179
13180 if (TRACE_BRANCH_P (cpu))
13181 {
13182 ++ stack_depth;
13183 TRACE_BRANCH (cpu,
13184 " %*scall %" PRIx64 " [%s]"
13185 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13186 stack_depth, " ", aarch64_get_next_PC (cpu),
13187 aarch64_get_func (CPU_STATE (cpu),
13188 aarch64_get_next_PC (cpu)),
13189 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13190 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13191 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13192 );
13193 }
13194 }
13195
13196 /* Unconditional Branch register.
13197 Branch/return address is in source register. */
13198
13199 /* Unconditional branch. */
13200 static void
13201 br (sim_cpu *cpu)
13202 {
13203 unsigned rn = INSTR (9, 5);
13204 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13205 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13206 }
13207
13208 /* Unconditional branch and link -- writes return PC to LR. */
13209 static void
13210 blr (sim_cpu *cpu)
13211 {
13212 unsigned rn = INSTR (9, 5);
13213
13214 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13215 /* The pseudo code in the spec says we update LR before fetching.
13216 the value from the rn. */
13217 aarch64_save_LR (cpu);
13218 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13219
13220 if (TRACE_BRANCH_P (cpu))
13221 {
13222 ++ stack_depth;
13223 TRACE_BRANCH (cpu,
13224 " %*scall %" PRIx64 " [%s]"
13225 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13226 stack_depth, " ", aarch64_get_next_PC (cpu),
13227 aarch64_get_func (CPU_STATE (cpu),
13228 aarch64_get_next_PC (cpu)),
13229 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13230 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13231 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13232 );
13233 }
13234 }
13235
13236 /* Return -- assembler will default source to LR this is functionally
13237 equivalent to br but, presumably, unlike br it side effects the
13238 branch predictor. */
13239 static void
13240 ret (sim_cpu *cpu)
13241 {
13242 unsigned rn = INSTR (9, 5);
13243 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13244
13245 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13246 if (TRACE_BRANCH_P (cpu))
13247 {
13248 TRACE_BRANCH (cpu,
13249 " %*sreturn [result: %" PRIx64 "]",
13250 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13251 -- stack_depth;
13252 }
13253 }
13254
13255 /* NOP -- we implement this and call it from the decode in case we
13256 want to intercept it later. */
13257
13258 static void
13259 nop (sim_cpu *cpu)
13260 {
13261 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13262 }
13263
13264 /* Data synchronization barrier. */
13265
13266 static void
13267 dsb (sim_cpu *cpu)
13268 {
13269 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13270 }
13271
13272 /* Data memory barrier. */
13273
13274 static void
13275 dmb (sim_cpu *cpu)
13276 {
13277 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13278 }
13279
13280 /* Instruction synchronization barrier. */
13281
13282 static void
13283 isb (sim_cpu *cpu)
13284 {
13285 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13286 }
13287
13288 static void
13289 dexBranchImmediate (sim_cpu *cpu)
13290 {
13291 /* assert instr[30,26] == 00101
13292 instr[31] ==> 0 == B, 1 == BL
13293 instr[25,0] == imm26 branch offset counted in words. */
13294
13295 uint32_t top = INSTR (31, 31);
13296 /* We have a 26 byte signed word offset which we need to pass to the
13297 execute routine as a signed byte offset. */
13298 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13299
13300 if (top)
13301 bl (cpu, offset);
13302 else
13303 buc (cpu, offset);
13304 }
13305
13306 /* Control Flow. */
13307
13308 /* Conditional branch
13309
13310 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13311 a bit position in the range 0 .. 63
13312
13313 cc is a CondCode enum value as pulled out of the decode
13314
13315 N.B. any offset register (source) can only be Xn or Wn. */
13316
13317 static void
13318 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13319 {
13320 /* The test returns TRUE if CC is met. */
13321 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13322 if (testConditionCode (cpu, cc))
13323 aarch64_set_next_PC_by_offset (cpu, offset);
13324 }
13325
13326 /* 32 bit branch on register non-zero. */
13327 static void
13328 cbnz32 (sim_cpu *cpu, int32_t offset)
13329 {
13330 unsigned rt = INSTR (4, 0);
13331
13332 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13333 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13334 aarch64_set_next_PC_by_offset (cpu, offset);
13335 }
13336
13337 /* 64 bit branch on register zero. */
13338 static void
13339 cbnz (sim_cpu *cpu, int32_t offset)
13340 {
13341 unsigned rt = INSTR (4, 0);
13342
13343 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13344 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13345 aarch64_set_next_PC_by_offset (cpu, offset);
13346 }
13347
13348 /* 32 bit branch on register non-zero. */
13349 static void
13350 cbz32 (sim_cpu *cpu, int32_t offset)
13351 {
13352 unsigned rt = INSTR (4, 0);
13353
13354 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13355 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13356 aarch64_set_next_PC_by_offset (cpu, offset);
13357 }
13358
13359 /* 64 bit branch on register zero. */
13360 static void
13361 cbz (sim_cpu *cpu, int32_t offset)
13362 {
13363 unsigned rt = INSTR (4, 0);
13364
13365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13366 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13367 aarch64_set_next_PC_by_offset (cpu, offset);
13368 }
13369
13370 /* Branch on register bit test non-zero -- one size fits all. */
13371 static void
13372 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13373 {
13374 unsigned rt = INSTR (4, 0);
13375
13376 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13377 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13378 aarch64_set_next_PC_by_offset (cpu, offset);
13379 }
13380
13381 /* Branch on register bit test zero -- one size fits all. */
13382 static void
13383 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13384 {
13385 unsigned rt = INSTR (4, 0);
13386
13387 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13388 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13389 aarch64_set_next_PC_by_offset (cpu, offset);
13390 }
13391
13392 static void
13393 dexCompareBranchImmediate (sim_cpu *cpu)
13394 {
13395 /* instr[30,25] = 01 1010
13396 instr[31] = size : 0 ==> 32, 1 ==> 64
13397 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13398 instr[23,5] = simm19 branch offset counted in words
13399 instr[4,0] = rt */
13400
13401 uint32_t size = INSTR (31, 31);
13402 uint32_t op = INSTR (24, 24);
13403 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13404
13405 if (size == 0)
13406 {
13407 if (op == 0)
13408 cbz32 (cpu, offset);
13409 else
13410 cbnz32 (cpu, offset);
13411 }
13412 else
13413 {
13414 if (op == 0)
13415 cbz (cpu, offset);
13416 else
13417 cbnz (cpu, offset);
13418 }
13419 }
13420
13421 static void
13422 dexTestBranchImmediate (sim_cpu *cpu)
13423 {
13424 /* instr[31] = b5 : bit 5 of test bit idx
13425 instr[30,25] = 01 1011
13426 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13427 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13428 instr[18,5] = simm14 : signed offset counted in words
13429 instr[4,0] = uimm5 */
13430
13431 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13432 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13433
13434 NYI_assert (30, 25, 0x1b);
13435
13436 if (INSTR (24, 24) == 0)
13437 tbz (cpu, pos, offset);
13438 else
13439 tbnz (cpu, pos, offset);
13440 }
13441
13442 static void
13443 dexCondBranchImmediate (sim_cpu *cpu)
13444 {
13445 /* instr[31,25] = 010 1010
13446 instr[24] = op1; op => 00 ==> B.cond
13447 instr[23,5] = simm19 : signed offset counted in words
13448 instr[4] = op0
13449 instr[3,0] = cond */
13450
13451 int32_t offset;
13452 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13453
13454 NYI_assert (31, 25, 0x2a);
13455
13456 if (op != 0)
13457 HALT_UNALLOC;
13458
13459 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13460
13461 bcc (cpu, offset, INSTR (3, 0));
13462 }
13463
13464 static void
13465 dexBranchRegister (sim_cpu *cpu)
13466 {
13467 /* instr[31,25] = 110 1011
13468 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13469 instr[20,16] = op2 : must be 11111
13470 instr[15,10] = op3 : must be 000000
13471 instr[4,0] = op2 : must be 11111. */
13472
13473 uint32_t op = INSTR (24, 21);
13474 uint32_t op2 = INSTR (20, 16);
13475 uint32_t op3 = INSTR (15, 10);
13476 uint32_t op4 = INSTR (4, 0);
13477
13478 NYI_assert (31, 25, 0x6b);
13479
13480 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13481 HALT_UNALLOC;
13482
13483 if (op == 0)
13484 br (cpu);
13485
13486 else if (op == 1)
13487 blr (cpu);
13488
13489 else if (op == 2)
13490 ret (cpu);
13491
13492 else
13493 {
13494 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13495 /* anything else is unallocated. */
13496 uint32_t rn = INSTR (4, 0);
13497
13498 if (rn != 0x1f)
13499 HALT_UNALLOC;
13500
13501 if (op == 4 || op == 5)
13502 HALT_NYI;
13503
13504 HALT_UNALLOC;
13505 }
13506 }
13507
13508 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13509 but this may not be available. So instead we define the values we need
13510 here. */
13511 #define AngelSVC_Reason_Open 0x01
13512 #define AngelSVC_Reason_Close 0x02
13513 #define AngelSVC_Reason_Write 0x05
13514 #define AngelSVC_Reason_Read 0x06
13515 #define AngelSVC_Reason_IsTTY 0x09
13516 #define AngelSVC_Reason_Seek 0x0A
13517 #define AngelSVC_Reason_FLen 0x0C
13518 #define AngelSVC_Reason_Remove 0x0E
13519 #define AngelSVC_Reason_Rename 0x0F
13520 #define AngelSVC_Reason_Clock 0x10
13521 #define AngelSVC_Reason_Time 0x11
13522 #define AngelSVC_Reason_System 0x12
13523 #define AngelSVC_Reason_Errno 0x13
13524 #define AngelSVC_Reason_GetCmdLine 0x15
13525 #define AngelSVC_Reason_HeapInfo 0x16
13526 #define AngelSVC_Reason_ReportException 0x18
13527 #define AngelSVC_Reason_Elapsed 0x30
13528
13529
13530 static void
13531 handle_halt (sim_cpu *cpu, uint32_t val)
13532 {
13533 uint64_t result = 0;
13534
13535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13536 if (val != 0xf000)
13537 {
13538 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13539 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13540 sim_stopped, SIM_SIGTRAP);
13541 }
13542
13543 /* We have encountered an Angel SVC call. See if we can process it. */
13544 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13545 {
13546 case AngelSVC_Reason_HeapInfo:
13547 {
13548 /* Get the values. */
13549 uint64_t stack_top = aarch64_get_stack_start (cpu);
13550 uint64_t heap_base = aarch64_get_heap_start (cpu);
13551
13552 /* Get the pointer */
13553 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13554 ptr = aarch64_get_mem_u64 (cpu, ptr);
13555
13556 /* Fill in the memory block. */
13557 /* Start addr of heap. */
13558 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13559 /* End addr of heap. */
13560 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13561 /* Lowest stack addr. */
13562 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13563 /* Initial stack addr. */
13564 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13565
13566 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13567 }
13568 break;
13569
13570 case AngelSVC_Reason_Open:
13571 {
13572 /* Get the pointer */
13573 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13574 /* FIXME: For now we just assume that we will only be asked
13575 to open the standard file descriptors. */
13576 static int fd = 0;
13577 result = fd ++;
13578
13579 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13580 }
13581 break;
13582
13583 case AngelSVC_Reason_Close:
13584 {
13585 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13586 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13587 result = 0;
13588 }
13589 break;
13590
13591 case AngelSVC_Reason_Errno:
13592 result = 0;
13593 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13594 break;
13595
13596 case AngelSVC_Reason_Clock:
13597 result =
13598 #ifdef CLOCKS_PER_SEC
13599 (CLOCKS_PER_SEC >= 100)
13600 ? (clock () / (CLOCKS_PER_SEC / 100))
13601 : ((clock () * 100) / CLOCKS_PER_SEC)
13602 #else
13603 /* Presume unix... clock() returns microseconds. */
13604 (clock () / 10000)
13605 #endif
13606 ;
13607 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13608 break;
13609
13610 case AngelSVC_Reason_GetCmdLine:
13611 {
13612 /* Get the pointer */
13613 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13614 ptr = aarch64_get_mem_u64 (cpu, ptr);
13615
13616 /* FIXME: No command line for now. */
13617 aarch64_set_mem_u64 (cpu, ptr, 0);
13618 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13619 }
13620 break;
13621
13622 case AngelSVC_Reason_IsTTY:
13623 result = 1;
13624 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13625 break;
13626
13627 case AngelSVC_Reason_Write:
13628 {
13629 /* Get the pointer */
13630 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13631 /* Get the write control block. */
13632 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13633 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13634 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13635
13636 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13637 PRIx64 " on descriptor %" PRIx64,
13638 len, buf, fd);
13639
13640 if (len > 1280)
13641 {
13642 TRACE_SYSCALL (cpu,
13643 " AngelSVC: Write: Suspiciously long write: %ld",
13644 (long) len);
13645 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13646 sim_stopped, SIM_SIGBUS);
13647 }
13648 else if (fd == 1)
13649 {
13650 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13651 }
13652 else if (fd == 2)
13653 {
13654 TRACE (cpu, 0, "\n");
13655 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13656 (int) len, aarch64_get_mem_ptr (cpu, buf));
13657 TRACE (cpu, 0, "\n");
13658 }
13659 else
13660 {
13661 TRACE_SYSCALL (cpu,
13662 " AngelSVC: Write: Unexpected file handle: %d",
13663 (int) fd);
13664 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13665 sim_stopped, SIM_SIGABRT);
13666 }
13667 }
13668 break;
13669
13670 case AngelSVC_Reason_ReportException:
13671 {
13672 /* Get the pointer */
13673 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13674 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13675 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13676 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13677
13678 TRACE_SYSCALL (cpu,
13679 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13680 type, state);
13681
13682 if (type == 0x20026)
13683 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13684 sim_exited, state);
13685 else
13686 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13687 sim_stopped, SIM_SIGINT);
13688 }
13689 break;
13690
13691 case AngelSVC_Reason_Read:
13692 case AngelSVC_Reason_FLen:
13693 case AngelSVC_Reason_Seek:
13694 case AngelSVC_Reason_Remove:
13695 case AngelSVC_Reason_Time:
13696 case AngelSVC_Reason_System:
13697 case AngelSVC_Reason_Rename:
13698 case AngelSVC_Reason_Elapsed:
13699 default:
13700 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13701 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13702 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13703 sim_stopped, SIM_SIGTRAP);
13704 }
13705
13706 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13707 }
13708
13709 static void
13710 dexExcpnGen (sim_cpu *cpu)
13711 {
13712 /* instr[31:24] = 11010100
13713 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13714 010 ==> HLT, 101 ==> DBG GEN EXCPN
13715 instr[20,5] = imm16
13716 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13717 instr[1,0] = LL : discriminates opc */
13718
13719 uint32_t opc = INSTR (23, 21);
13720 uint32_t imm16 = INSTR (20, 5);
13721 uint32_t opc2 = INSTR (4, 2);
13722 uint32_t LL;
13723
13724 NYI_assert (31, 24, 0xd4);
13725
13726 if (opc2 != 0)
13727 HALT_UNALLOC;
13728
13729 LL = INSTR (1, 0);
13730
13731 /* We only implement HLT and BRK for now. */
13732 if (opc == 1 && LL == 0)
13733 {
13734 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13735 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13736 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13737 }
13738
13739 if (opc == 2 && LL == 0)
13740 handle_halt (cpu, imm16);
13741
13742 else if (opc == 0 || opc == 5)
13743 HALT_NYI;
13744
13745 else
13746 HALT_UNALLOC;
13747 }
13748
13749 /* Stub for accessing system registers. */
13750
13751 static uint64_t
13752 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13753 unsigned crm, unsigned op2)
13754 {
13755 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13756 /* DCZID_EL0 - the Data Cache Zero ID register.
13757 We do not support DC ZVA at the moment, so
13758 we return a value with the disable bit set.
13759 We implement support for the DCZID register since
13760 it is used by the C library's memset function. */
13761 return ((uint64_t) 1) << 4;
13762
13763 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13764 /* Cache Type Register. */
13765 return 0x80008000UL;
13766
13767 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13768 /* TPIDR_EL0 - thread pointer id. */
13769 return aarch64_get_thread_id (cpu);
13770
13771 if (op1 == 3 && crm == 4 && op2 == 0)
13772 return aarch64_get_FPCR (cpu);
13773
13774 if (op1 == 3 && crm == 4 && op2 == 1)
13775 return aarch64_get_FPSR (cpu);
13776
13777 else if (op1 == 3 && crm == 2 && op2 == 0)
13778 return aarch64_get_CPSR (cpu);
13779
13780 HALT_NYI;
13781 }
13782
13783 static void
13784 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13785 unsigned crm, unsigned op2, uint64_t val)
13786 {
13787 if (op1 == 3 && crm == 4 && op2 == 0)
13788 aarch64_set_FPCR (cpu, val);
13789
13790 else if (op1 == 3 && crm == 4 && op2 == 1)
13791 aarch64_set_FPSR (cpu, val);
13792
13793 else if (op1 == 3 && crm == 2 && op2 == 0)
13794 aarch64_set_CPSR (cpu, val);
13795
13796 else
13797 HALT_NYI;
13798 }
13799
13800 static void
13801 do_mrs (sim_cpu *cpu)
13802 {
13803 /* instr[31:20] = 1101 0101 0001 1
13804 instr[19] = op0
13805 instr[18,16] = op1
13806 instr[15,12] = CRn
13807 instr[11,8] = CRm
13808 instr[7,5] = op2
13809 instr[4,0] = Rt */
13810 unsigned sys_op0 = INSTR (19, 19) + 2;
13811 unsigned sys_op1 = INSTR (18, 16);
13812 unsigned sys_crn = INSTR (15, 12);
13813 unsigned sys_crm = INSTR (11, 8);
13814 unsigned sys_op2 = INSTR (7, 5);
13815 unsigned rt = INSTR (4, 0);
13816
13817 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13818 aarch64_set_reg_u64 (cpu, rt, NO_SP,
13819 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13820 }
13821
13822 static void
13823 do_MSR_immediate (sim_cpu *cpu)
13824 {
13825 /* instr[31:19] = 1101 0101 0000 0
13826 instr[18,16] = op1
13827 instr[15,12] = 0100
13828 instr[11,8] = CRm
13829 instr[7,5] = op2
13830 instr[4,0] = 1 1111 */
13831
13832 unsigned op1 = INSTR (18, 16);
13833 /*unsigned crm = INSTR (11, 8);*/
13834 unsigned op2 = INSTR (7, 5);
13835
13836 NYI_assert (31, 19, 0x1AA0);
13837 NYI_assert (15, 12, 0x4);
13838 NYI_assert (4, 0, 0x1F);
13839
13840 if (op1 == 0)
13841 {
13842 if (op2 == 5)
13843 HALT_NYI; /* set SPSel. */
13844 else
13845 HALT_UNALLOC;
13846 }
13847 else if (op1 == 3)
13848 {
13849 if (op2 == 6)
13850 HALT_NYI; /* set DAIFset. */
13851 else if (op2 == 7)
13852 HALT_NYI; /* set DAIFclr. */
13853 else
13854 HALT_UNALLOC;
13855 }
13856 else
13857 HALT_UNALLOC;
13858 }
13859
13860 static void
13861 do_MSR_reg (sim_cpu *cpu)
13862 {
13863 /* instr[31:20] = 1101 0101 0001
13864 instr[19] = op0
13865 instr[18,16] = op1
13866 instr[15,12] = CRn
13867 instr[11,8] = CRm
13868 instr[7,5] = op2
13869 instr[4,0] = Rt */
13870
13871 unsigned sys_op0 = INSTR (19, 19) + 2;
13872 unsigned sys_op1 = INSTR (18, 16);
13873 unsigned sys_crn = INSTR (15, 12);
13874 unsigned sys_crm = INSTR (11, 8);
13875 unsigned sys_op2 = INSTR (7, 5);
13876 unsigned rt = INSTR (4, 0);
13877
13878 NYI_assert (31, 20, 0xD51);
13879
13880 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13881 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
13882 aarch64_get_reg_u64 (cpu, rt, NO_SP));
13883 }
13884
13885 static void
13886 do_SYS (sim_cpu *cpu)
13887 {
13888 /* instr[31,19] = 1101 0101 0000 1
13889 instr[18,16] = op1
13890 instr[15,12] = CRn
13891 instr[11,8] = CRm
13892 instr[7,5] = op2
13893 instr[4,0] = Rt */
13894 NYI_assert (31, 19, 0x1AA1);
13895
13896 /* FIXME: For now we just silently accept system ops. */
13897 }
13898
13899 static void
13900 dexSystem (sim_cpu *cpu)
13901 {
13902 /* instr[31:22] = 1101 01010 0
13903 instr[21] = L
13904 instr[20,19] = op0
13905 instr[18,16] = op1
13906 instr[15,12] = CRn
13907 instr[11,8] = CRm
13908 instr[7,5] = op2
13909 instr[4,0] = uimm5 */
13910
13911 /* We are interested in HINT, DSB, DMB and ISB
13912
13913 Hint #0 encodes NOOP (this is the only hint we care about)
13914 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
13915 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
13916
13917 DSB, DMB, ISB are data store barrier, data memory barrier and
13918 instruction store barrier, respectively, where
13919
13920 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
13921 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
13922 CRm<3:2> ==> domain, CRm<1:0> ==> types,
13923 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
13924 10 ==> InerShareable, 11 ==> FullSystem
13925 types : 01 ==> Reads, 10 ==> Writes,
13926 11 ==> All, 00 ==> All (domain == FullSystem). */
13927
13928 unsigned rt = INSTR (4, 0);
13929
13930 NYI_assert (31, 22, 0x354);
13931
13932 switch (INSTR (21, 12))
13933 {
13934 case 0x032:
13935 if (rt == 0x1F)
13936 {
13937 /* NOP has CRm != 0000 OR. */
13938 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
13939 uint32_t crm = INSTR (11, 8);
13940 uint32_t op2 = INSTR (7, 5);
13941
13942 if (crm != 0 || (op2 == 0 || op2 > 5))
13943 {
13944 /* Actually call nop method so we can reimplement it later. */
13945 nop (cpu);
13946 return;
13947 }
13948 }
13949 HALT_NYI;
13950
13951 case 0x033:
13952 {
13953 uint32_t op2 = INSTR (7, 5);
13954
13955 switch (op2)
13956 {
13957 case 2: HALT_NYI;
13958 case 4: dsb (cpu); return;
13959 case 5: dmb (cpu); return;
13960 case 6: isb (cpu); return;
13961 default: HALT_UNALLOC;
13962 }
13963 }
13964
13965 case 0x3B0:
13966 case 0x3B4:
13967 case 0x3BD:
13968 do_mrs (cpu);
13969 return;
13970
13971 case 0x0B7:
13972 do_SYS (cpu); /* DC is an alias of SYS. */
13973 return;
13974
13975 default:
13976 if (INSTR (21, 20) == 0x1)
13977 do_MSR_reg (cpu);
13978 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
13979 do_MSR_immediate (cpu);
13980 else
13981 HALT_NYI;
13982 return;
13983 }
13984 }
13985
13986 static void
13987 dexBr (sim_cpu *cpu)
13988 {
13989 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13990 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
13991 bits [31,29] of a BrExSys are the secondary dispatch vector. */
13992 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
13993
13994 switch (group2)
13995 {
13996 case BR_IMM_000:
13997 return dexBranchImmediate (cpu);
13998
13999 case BR_IMMCMP_001:
14000 /* Compare has bit 25 clear while test has it set. */
14001 if (!INSTR (25, 25))
14002 dexCompareBranchImmediate (cpu);
14003 else
14004 dexTestBranchImmediate (cpu);
14005 return;
14006
14007 case BR_IMMCOND_010:
14008 /* This is a conditional branch if bit 25 is clear otherwise
14009 unallocated. */
14010 if (!INSTR (25, 25))
14011 dexCondBranchImmediate (cpu);
14012 else
14013 HALT_UNALLOC;
14014 return;
14015
14016 case BR_UNALLOC_011:
14017 HALT_UNALLOC;
14018
14019 case BR_IMM_100:
14020 dexBranchImmediate (cpu);
14021 return;
14022
14023 case BR_IMMCMP_101:
14024 /* Compare has bit 25 clear while test has it set. */
14025 if (!INSTR (25, 25))
14026 dexCompareBranchImmediate (cpu);
14027 else
14028 dexTestBranchImmediate (cpu);
14029 return;
14030
14031 case BR_REG_110:
14032 /* Unconditional branch reg has bit 25 set. */
14033 if (INSTR (25, 25))
14034 dexBranchRegister (cpu);
14035
14036 /* This includes both Excpn Gen, System and unalloc operations.
14037 We need to decode the Excpn Gen operation BRK so we can plant
14038 debugger entry points.
14039 Excpn Gen operations have instr [24] = 0.
14040 we need to decode at least one of the System operations NOP
14041 which is an alias for HINT #0.
14042 System operations have instr [24,22] = 100. */
14043 else if (INSTR (24, 24) == 0)
14044 dexExcpnGen (cpu);
14045
14046 else if (INSTR (24, 22) == 4)
14047 dexSystem (cpu);
14048
14049 else
14050 HALT_UNALLOC;
14051
14052 return;
14053
14054 case BR_UNALLOC_111:
14055 HALT_UNALLOC;
14056
14057 default:
14058 /* Should never reach here. */
14059 HALT_NYI;
14060 }
14061 }
14062
14063 static void
14064 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14065 {
14066 /* We need to check if gdb wants an in here. */
14067 /* checkBreak (cpu);. */
14068
14069 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14070
14071 switch (group)
14072 {
14073 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
14074 case GROUP_LDST_0100: dexLdSt (cpu); break;
14075 case GROUP_DPREG_0101: dexDPReg (cpu); break;
14076 case GROUP_LDST_0110: dexLdSt (cpu); break;
14077 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
14078 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
14079 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
14080 case GROUP_BREXSYS_1010: dexBr (cpu); break;
14081 case GROUP_BREXSYS_1011: dexBr (cpu); break;
14082 case GROUP_LDST_1100: dexLdSt (cpu); break;
14083 case GROUP_DPREG_1101: dexDPReg (cpu); break;
14084 case GROUP_LDST_1110: dexLdSt (cpu); break;
14085 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
14086
14087 case GROUP_UNALLOC_0001:
14088 case GROUP_UNALLOC_0010:
14089 case GROUP_UNALLOC_0011:
14090 HALT_UNALLOC;
14091
14092 default:
14093 /* Should never reach here. */
14094 HALT_NYI;
14095 }
14096 }
14097
14098 static bfd_boolean
14099 aarch64_step (sim_cpu *cpu)
14100 {
14101 uint64_t pc = aarch64_get_PC (cpu);
14102
14103 if (pc == TOP_LEVEL_RETURN_PC)
14104 return FALSE;
14105
14106 aarch64_set_next_PC (cpu, pc + 4);
14107
14108 /* Code is always little-endian. */
14109 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14110 & aarch64_get_instr (cpu), pc, 4);
14111 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14112
14113 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14114 aarch64_get_instr (cpu));
14115 TRACE_DISASM (cpu, pc);
14116
14117 aarch64_decode_and_execute (cpu, pc);
14118
14119 return TRUE;
14120 }
14121
14122 void
14123 aarch64_run (SIM_DESC sd)
14124 {
14125 sim_cpu *cpu = STATE_CPU (sd, 0);
14126
14127 while (aarch64_step (cpu))
14128 {
14129 aarch64_update_PC (cpu);
14130
14131 if (sim_events_tick (sd))
14132 sim_events_process (sd);
14133 }
14134
14135 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14136 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14137 }
14138
14139 void
14140 aarch64_init (sim_cpu *cpu, uint64_t pc)
14141 {
14142 uint64_t sp = aarch64_get_stack_start (cpu);
14143
14144 /* Install SP, FP and PC and set LR to -20
14145 so we can detect a top-level return. */
14146 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14147 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14148 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14149 aarch64_set_next_PC (cpu, pc);
14150 aarch64_update_PC (cpu);
14151 aarch64_init_LIT_table ();
14152 }
This page took 0.355582 seconds and 4 git commands to generate.