Support the fcmXX zero instructions.
[deliverable/binutils-gdb.git] / sim / aarch64 / simulator.c
1 /* simulator.c -- Interface for the AArch64 simulator.
2
3 Copyright (C) 2015-2017 Free Software Foundation, Inc.
4
5 Contributed by Red Hat.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <sys/types.h>
27 #include <math.h>
28 #include <time.h>
29 #include <limits.h>
30
31 #include "simulator.h"
32 #include "cpustate.h"
33 #include "memory.h"
34
35 #define NO_SP 0
36 #define SP_OK 1
37
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
41
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
44
45 #define HALT_UNALLOC \
46 do \
47 { \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
49 TRACE_INSN (cpu, \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
55 } \
56 while (0)
57
58 #define HALT_NYI \
59 do \
60 { \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
62 TRACE_INSN (cpu, \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
67 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
68 aarch64_get_instr (cpu)); \
69 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
70 sim_stopped, SIM_SIGABRT); \
71 } \
72 while (0)
73
74 #define NYI_assert(HI, LO, EXPECTED) \
75 do \
76 { \
77 if (INSTR ((HI), (LO)) != (EXPECTED)) \
78 HALT_NYI; \
79 } \
80 while (0)
81
82 /* Helper functions used by expandLogicalImmediate. */
83
84 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
85 static inline uint64_t
86 ones (int N)
87 {
88 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
89 }
90
91 /* result<0> to val<N> */
92 static inline uint64_t
93 pickbit (uint64_t val, int N)
94 {
95 return pickbits64 (val, N, N);
96 }
97
98 static uint64_t
99 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
100 {
101 uint64_t mask;
102 uint64_t imm;
103 unsigned simd_size;
104
105 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
106 (in other words, right rotated by R), then replicated. */
107 if (N != 0)
108 {
109 simd_size = 64;
110 mask = 0xffffffffffffffffull;
111 }
112 else
113 {
114 switch (S)
115 {
116 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
117 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
118 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
119 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
120 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
121 default: return 0;
122 }
123 mask = (1ull << simd_size) - 1;
124 /* Top bits are IGNORED. */
125 R &= simd_size - 1;
126 }
127
128 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
129 if (S == simd_size - 1)
130 return 0;
131
132 /* S+1 consecutive bits to 1. */
133 /* NOTE: S can't be 63 due to detection above. */
134 imm = (1ull << (S + 1)) - 1;
135
136 /* Rotate to the left by simd_size - R. */
137 if (R != 0)
138 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
139
140 /* Replicate the value according to SIMD size. */
141 switch (simd_size)
142 {
143 case 2: imm = (imm << 2) | imm;
144 case 4: imm = (imm << 4) | imm;
145 case 8: imm = (imm << 8) | imm;
146 case 16: imm = (imm << 16) | imm;
147 case 32: imm = (imm << 32) | imm;
148 case 64: break;
149 default: return 0;
150 }
151
152 return imm;
153 }
154
155 /* Instr[22,10] encodes N immr and imms. we want a lookup table
156 for each possible combination i.e. 13 bits worth of int entries. */
157 #define LI_TABLE_SIZE (1 << 13)
158 static uint64_t LITable[LI_TABLE_SIZE];
159
160 void
161 aarch64_init_LIT_table (void)
162 {
163 unsigned index;
164
165 for (index = 0; index < LI_TABLE_SIZE; index++)
166 {
167 uint32_t N = uimm (index, 12, 12);
168 uint32_t immr = uimm (index, 11, 6);
169 uint32_t imms = uimm (index, 5, 0);
170
171 LITable [index] = expand_logical_immediate (imms, immr, N);
172 }
173 }
174
175 static void
176 dexNotify (sim_cpu *cpu)
177 {
178 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
179 2 ==> exit Java, 3 ==> start next bytecode. */
180 uint32_t type = INSTR (14, 0);
181
182 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
183
184 switch (type)
185 {
186 case 0:
187 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
188 aarch64_get_reg_u64 (cpu, R22, 0)); */
189 break;
190 case 1:
191 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
192 aarch64_get_reg_u64 (cpu, R22, 0)); */
193 break;
194 case 2:
195 /* aarch64_notifyMethodExit (); */
196 break;
197 case 3:
198 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
199 aarch64_get_reg_u64 (cpu, R22, 0)); */
200 break;
201 }
202 }
203
204 /* secondary decode within top level groups */
205
206 static void
207 dexPseudo (sim_cpu *cpu)
208 {
209 /* assert instr[28,27] = 00
210
211 We provide 2 pseudo instructions:
212
213 HALT stops execution of the simulator causing an immediate
214 return to the x86 code which entered it.
215
216 CALLOUT initiates recursive entry into x86 code. A register
217 argument holds the address of the x86 routine. Immediate
218 values in the instruction identify the number of general
219 purpose and floating point register arguments to be passed
220 and the type of any value to be returned. */
221
222 uint32_t PSEUDO_HALT = 0xE0000000U;
223 uint32_t PSEUDO_CALLOUT = 0x00018000U;
224 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
225 uint32_t PSEUDO_NOTIFY = 0x00014000U;
226 uint32_t dispatch;
227
228 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
229 {
230 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
231 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
232 sim_stopped, SIM_SIGTRAP);
233 }
234
235 dispatch = INSTR (31, 15);
236
237 /* We do not handle callouts at the moment. */
238 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
239 {
240 TRACE_EVENTS (cpu, " Callout");
241 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
242 sim_stopped, SIM_SIGABRT);
243 }
244
245 else if (dispatch == PSEUDO_NOTIFY)
246 dexNotify (cpu);
247
248 else
249 HALT_UNALLOC;
250 }
251
252 /* Load-store single register (unscaled offset)
253 These instructions employ a base register plus an unscaled signed
254 9 bit offset.
255
256 N.B. the base register (source) can be Xn or SP. all other
257 registers may not be SP. */
258
259 /* 32 bit load 32 bit unscaled signed 9 bit. */
260 static void
261 ldur32 (sim_cpu *cpu, int32_t offset)
262 {
263 unsigned rn = INSTR (9, 5);
264 unsigned rt = INSTR (4, 0);
265
266 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
267 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
268 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
269 + offset));
270 }
271
272 /* 64 bit load 64 bit unscaled signed 9 bit. */
273 static void
274 ldur64 (sim_cpu *cpu, int32_t offset)
275 {
276 unsigned rn = INSTR (9, 5);
277 unsigned rt = INSTR (4, 0);
278
279 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
282 + offset));
283 }
284
285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
286 static void
287 ldurb32 (sim_cpu *cpu, int32_t offset)
288 {
289 unsigned rn = INSTR (9, 5);
290 unsigned rt = INSTR (4, 0);
291
292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
293 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
294 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
295 + offset));
296 }
297
298 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
299 static void
300 ldursb32 (sim_cpu *cpu, int32_t offset)
301 {
302 unsigned rn = INSTR (9, 5);
303 unsigned rt = INSTR (4, 0);
304
305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
306 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
307 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
308 + offset));
309 }
310
311 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
312 static void
313 ldursb64 (sim_cpu *cpu, int32_t offset)
314 {
315 unsigned rn = INSTR (9, 5);
316 unsigned rt = INSTR (4, 0);
317
318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
319 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
320 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
321 + offset));
322 }
323
324 /* 32 bit load zero-extended short unscaled signed 9 bit */
325 static void
326 ldurh32 (sim_cpu *cpu, int32_t offset)
327 {
328 unsigned rn = INSTR (9, 5);
329 unsigned rd = INSTR (4, 0);
330
331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
332 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
333 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
334 + offset));
335 }
336
337 /* 32 bit load sign-extended short unscaled signed 9 bit */
338 static void
339 ldursh32 (sim_cpu *cpu, int32_t offset)
340 {
341 unsigned rn = INSTR (9, 5);
342 unsigned rd = INSTR (4, 0);
343
344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
345 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
346 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
347 + offset));
348 }
349
350 /* 64 bit load sign-extended short unscaled signed 9 bit */
351 static void
352 ldursh64 (sim_cpu *cpu, int32_t offset)
353 {
354 unsigned rn = INSTR (9, 5);
355 unsigned rt = INSTR (4, 0);
356
357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
358 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
359 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
360 + offset));
361 }
362
363 /* 64 bit load sign-extended word unscaled signed 9 bit */
364 static void
365 ldursw (sim_cpu *cpu, int32_t offset)
366 {
367 unsigned rn = INSTR (9, 5);
368 unsigned rd = INSTR (4, 0);
369
370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
371 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
372 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
373 + offset));
374 }
375
376 /* N.B. with stores the value in source is written to the address
377 identified by source2 modified by offset. */
378
379 /* 32 bit store 32 bit unscaled signed 9 bit. */
380 static void
381 stur32 (sim_cpu *cpu, int32_t offset)
382 {
383 unsigned rn = INSTR (9, 5);
384 unsigned rd = INSTR (4, 0);
385
386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
387 aarch64_set_mem_u32 (cpu,
388 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
389 aarch64_get_reg_u32 (cpu, rd, NO_SP));
390 }
391
392 /* 64 bit store 64 bit unscaled signed 9 bit */
393 static void
394 stur64 (sim_cpu *cpu, int32_t offset)
395 {
396 unsigned rn = INSTR (9, 5);
397 unsigned rd = INSTR (4, 0);
398
399 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
400 aarch64_set_mem_u64 (cpu,
401 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
402 aarch64_get_reg_u64 (cpu, rd, NO_SP));
403 }
404
405 /* 32 bit store byte unscaled signed 9 bit */
406 static void
407 sturb (sim_cpu *cpu, int32_t offset)
408 {
409 unsigned rn = INSTR (9, 5);
410 unsigned rd = INSTR (4, 0);
411
412 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
413 aarch64_set_mem_u8 (cpu,
414 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
415 aarch64_get_reg_u8 (cpu, rd, NO_SP));
416 }
417
418 /* 32 bit store short unscaled signed 9 bit */
419 static void
420 sturh (sim_cpu *cpu, int32_t offset)
421 {
422 unsigned rn = INSTR (9, 5);
423 unsigned rd = INSTR (4, 0);
424
425 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
426 aarch64_set_mem_u16 (cpu,
427 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
428 aarch64_get_reg_u16 (cpu, rd, NO_SP));
429 }
430
431 /* Load single register pc-relative label
432 Offset is a signed 19 bit immediate count in words
433 rt may not be SP. */
434
435 /* 32 bit pc-relative load */
436 static void
437 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
438 {
439 unsigned rd = INSTR (4, 0);
440
441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
442 aarch64_set_reg_u64 (cpu, rd, NO_SP,
443 aarch64_get_mem_u32
444 (cpu, aarch64_get_PC (cpu) + offset * 4));
445 }
446
447 /* 64 bit pc-relative load */
448 static void
449 ldr_pcrel (sim_cpu *cpu, int32_t offset)
450 {
451 unsigned rd = INSTR (4, 0);
452
453 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
454 aarch64_set_reg_u64 (cpu, rd, NO_SP,
455 aarch64_get_mem_u64
456 (cpu, aarch64_get_PC (cpu) + offset * 4));
457 }
458
459 /* sign extended 32 bit pc-relative load */
460 static void
461 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
462 {
463 unsigned rd = INSTR (4, 0);
464
465 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
466 aarch64_set_reg_u64 (cpu, rd, NO_SP,
467 aarch64_get_mem_s32
468 (cpu, aarch64_get_PC (cpu) + offset * 4));
469 }
470
471 /* float pc-relative load */
472 static void
473 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
474 {
475 unsigned int rd = INSTR (4, 0);
476
477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
478 aarch64_set_vec_u32 (cpu, rd, 0,
479 aarch64_get_mem_u32
480 (cpu, aarch64_get_PC (cpu) + offset * 4));
481 }
482
483 /* double pc-relative load */
484 static void
485 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
486 {
487 unsigned int st = INSTR (4, 0);
488
489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
490 aarch64_set_vec_u64 (cpu, st, 0,
491 aarch64_get_mem_u64
492 (cpu, aarch64_get_PC (cpu) + offset * 4));
493 }
494
495 /* long double pc-relative load. */
496 static void
497 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
498 {
499 unsigned int st = INSTR (4, 0);
500 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
501 FRegister a;
502
503 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
504 aarch64_get_mem_long_double (cpu, addr, & a);
505 aarch64_set_FP_long_double (cpu, st, a);
506 }
507
508 /* This can be used to scale an offset by applying
509 the requisite shift. the second argument is either
510 16, 32 or 64. */
511
512 #define SCALE(_offset, _elementSize) \
513 ((_offset) << ScaleShift ## _elementSize)
514
515 /* This can be used to optionally scale a register derived offset
516 by applying the requisite shift as indicated by the Scaling
517 argument. The second argument is either Byte, Short, Word
518 or Long. The third argument is either Scaled or Unscaled.
519 N.B. when _Scaling is Scaled the shift gets ANDed with
520 all 1s while when it is Unscaled it gets ANDed with 0. */
521
522 #define OPT_SCALE(_offset, _elementType, _Scaling) \
523 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
524
525 /* This can be used to zero or sign extend a 32 bit register derived
526 value to a 64 bit value. the first argument must be the value as
527 a uint32_t and the second must be either UXTW or SXTW. The result
528 is returned as an int64_t. */
529
530 static inline int64_t
531 extend (uint32_t value, Extension extension)
532 {
533 union
534 {
535 uint32_t u;
536 int32_t n;
537 } x;
538
539 /* A branchless variant of this ought to be possible. */
540 if (extension == UXTW || extension == NoExtension)
541 return value;
542
543 x.u = value;
544 return x.n;
545 }
546
547 /* Scalar Floating Point
548
549 FP load/store single register (4 addressing modes)
550
551 N.B. the base register (source) can be the stack pointer.
552 The secondary source register (source2) can only be an Xn register. */
553
554 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
555 static void
556 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
557 {
558 unsigned rn = INSTR (9, 5);
559 unsigned st = INSTR (4, 0);
560 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
561
562 if (wb != Post)
563 address += offset;
564
565 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
566 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
567 if (wb == Post)
568 address += offset;
569
570 if (wb != NoWriteBack)
571 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
572 }
573
574 /* Load 8 bit with unsigned 12 bit offset. */
575 static void
576 fldrb_abs (sim_cpu *cpu, uint32_t offset)
577 {
578 unsigned rd = INSTR (4, 0);
579 unsigned rn = INSTR (9, 5);
580 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
581
582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
583 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
584 }
585
586 /* Load 16 bit scaled unsigned 12 bit. */
587 static void
588 fldrh_abs (sim_cpu *cpu, uint32_t offset)
589 {
590 unsigned rd = INSTR (4, 0);
591 unsigned rn = INSTR (9, 5);
592 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
593
594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
595 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
596 }
597
598 /* Load 32 bit scaled unsigned 12 bit. */
599 static void
600 fldrs_abs (sim_cpu *cpu, uint32_t offset)
601 {
602 unsigned rd = INSTR (4, 0);
603 unsigned rn = INSTR (9, 5);
604 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
605
606 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
607 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
608 }
609
610 /* Load 64 bit scaled unsigned 12 bit. */
611 static void
612 fldrd_abs (sim_cpu *cpu, uint32_t offset)
613 {
614 unsigned rd = INSTR (4, 0);
615 unsigned rn = INSTR (9, 5);
616 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
617
618 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
619 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
620 }
621
622 /* Load 128 bit scaled unsigned 12 bit. */
623 static void
624 fldrq_abs (sim_cpu *cpu, uint32_t offset)
625 {
626 unsigned rd = INSTR (4, 0);
627 unsigned rn = INSTR (9, 5);
628 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
629
630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
631 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
632 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
633 }
634
635 /* Load 32 bit scaled or unscaled zero- or sign-extended
636 32-bit register offset. */
637 static void
638 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
639 {
640 unsigned rm = INSTR (20, 16);
641 unsigned rn = INSTR (9, 5);
642 unsigned st = INSTR (4, 0);
643 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
644 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
645 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
646
647 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
648 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
649 (cpu, address + displacement));
650 }
651
652 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
653 static void
654 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
655 {
656 unsigned rn = INSTR (9, 5);
657 unsigned st = INSTR (4, 0);
658 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
659
660 if (wb != Post)
661 address += offset;
662
663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
664 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
665
666 if (wb == Post)
667 address += offset;
668
669 if (wb != NoWriteBack)
670 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
671 }
672
673 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
674 static void
675 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
676 {
677 unsigned rm = INSTR (20, 16);
678 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
679 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
680
681 fldrd_wb (cpu, displacement, NoWriteBack);
682 }
683
684 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
685 static void
686 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
687 {
688 FRegister a;
689 unsigned rn = INSTR (9, 5);
690 unsigned st = INSTR (4, 0);
691 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
692
693 if (wb != Post)
694 address += offset;
695
696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
697 aarch64_get_mem_long_double (cpu, address, & a);
698 aarch64_set_FP_long_double (cpu, st, a);
699
700 if (wb == Post)
701 address += offset;
702
703 if (wb != NoWriteBack)
704 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
705 }
706
707 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
708 static void
709 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
710 {
711 unsigned rm = INSTR (20, 16);
712 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
713 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
714
715 fldrq_wb (cpu, displacement, NoWriteBack);
716 }
717
718 /* Memory Access
719
720 load-store single register
721 There are four addressing modes available here which all employ a
722 64 bit source (base) register.
723
724 N.B. the base register (source) can be the stack pointer.
725 The secondary source register (source2)can only be an Xn register.
726
727 Scaled, 12-bit, unsigned immediate offset, without pre- and
728 post-index options.
729 Unscaled, 9-bit, signed immediate offset with pre- or post-index
730 writeback.
731 scaled or unscaled 64-bit register offset.
732 scaled or unscaled 32-bit extended register offset.
733
734 All offsets are assumed to be raw from the decode i.e. the
735 simulator is expected to adjust scaled offsets based on the
736 accessed data size with register or extended register offset
737 versions the same applies except that in the latter case the
738 operation may also require a sign extend.
739
740 A separate method is provided for each possible addressing mode. */
741
742 /* 32 bit load 32 bit scaled unsigned 12 bit */
743 static void
744 ldr32_abs (sim_cpu *cpu, uint32_t offset)
745 {
746 unsigned rn = INSTR (9, 5);
747 unsigned rt = INSTR (4, 0);
748
749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
750 /* The target register may not be SP but the source may be. */
751 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
752 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
753 + SCALE (offset, 32)));
754 }
755
756 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
757 static void
758 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
759 {
760 unsigned rn = INSTR (9, 5);
761 unsigned rt = INSTR (4, 0);
762 uint64_t address;
763
764 if (rn == rt && wb != NoWriteBack)
765 HALT_UNALLOC;
766
767 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
768
769 if (wb != Post)
770 address += offset;
771
772 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
773 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
774
775 if (wb == Post)
776 address += offset;
777
778 if (wb != NoWriteBack)
779 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
780 }
781
782 /* 32 bit load 32 bit scaled or unscaled
783 zero- or sign-extended 32-bit register offset */
784 static void
785 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
786 {
787 unsigned rm = INSTR (20, 16);
788 unsigned rn = INSTR (9, 5);
789 unsigned rt = INSTR (4, 0);
790 /* rn may reference SP, rm and rt must reference ZR */
791
792 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
793 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
794 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
795
796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
797 aarch64_set_reg_u64 (cpu, rt, NO_SP,
798 aarch64_get_mem_u32 (cpu, address + displacement));
799 }
800
801 /* 64 bit load 64 bit scaled unsigned 12 bit */
802 static void
803 ldr_abs (sim_cpu *cpu, uint32_t offset)
804 {
805 unsigned rn = INSTR (9, 5);
806 unsigned rt = INSTR (4, 0);
807
808 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
809 /* The target register may not be SP but the source may be. */
810 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
811 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
812 + SCALE (offset, 64)));
813 }
814
815 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
816 static void
817 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
818 {
819 unsigned rn = INSTR (9, 5);
820 unsigned rt = INSTR (4, 0);
821 uint64_t address;
822
823 if (rn == rt && wb != NoWriteBack)
824 HALT_UNALLOC;
825
826 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
827
828 if (wb != Post)
829 address += offset;
830
831 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
832 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
833
834 if (wb == Post)
835 address += offset;
836
837 if (wb != NoWriteBack)
838 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
839 }
840
841 /* 64 bit load 64 bit scaled or unscaled zero-
842 or sign-extended 32-bit register offset. */
843 static void
844 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
845 {
846 unsigned rm = INSTR (20, 16);
847 unsigned rn = INSTR (9, 5);
848 unsigned rt = INSTR (4, 0);
849 /* rn may reference SP, rm and rt must reference ZR */
850
851 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
852 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
853 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
854
855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
856 aarch64_set_reg_u64 (cpu, rt, NO_SP,
857 aarch64_get_mem_u64 (cpu, address + displacement));
858 }
859
860 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
861 static void
862 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
863 {
864 unsigned rn = INSTR (9, 5);
865 unsigned rt = INSTR (4, 0);
866
867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
868 /* The target register may not be SP but the source may be
869 there is no scaling required for a byte load. */
870 aarch64_set_reg_u64 (cpu, rt, NO_SP,
871 aarch64_get_mem_u8
872 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
873 }
874
875 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
876 static void
877 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
878 {
879 unsigned rn = INSTR (9, 5);
880 unsigned rt = INSTR (4, 0);
881 uint64_t address;
882
883 if (rn == rt && wb != NoWriteBack)
884 HALT_UNALLOC;
885
886 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
887
888 if (wb != Post)
889 address += offset;
890
891 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
892 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
893
894 if (wb == Post)
895 address += offset;
896
897 if (wb != NoWriteBack)
898 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
899 }
900
901 /* 32 bit load zero-extended byte scaled or unscaled zero-
902 or sign-extended 32-bit register offset. */
903 static void
904 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
905 {
906 unsigned rm = INSTR (20, 16);
907 unsigned rn = INSTR (9, 5);
908 unsigned rt = INSTR (4, 0);
909 /* rn may reference SP, rm and rt must reference ZR */
910
911 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
912 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
913 extension);
914
915 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
916 /* There is no scaling required for a byte load. */
917 aarch64_set_reg_u64 (cpu, rt, NO_SP,
918 aarch64_get_mem_u8 (cpu, address + displacement));
919 }
920
921 /* 64 bit load sign-extended byte unscaled signed 9 bit
922 with pre- or post-writeback. */
923 static void
924 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
925 {
926 unsigned rn = INSTR (9, 5);
927 unsigned rt = INSTR (4, 0);
928 uint64_t address;
929 int64_t val;
930
931 if (rn == rt && wb != NoWriteBack)
932 HALT_UNALLOC;
933
934 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
935
936 if (wb != Post)
937 address += offset;
938
939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
940 val = aarch64_get_mem_s8 (cpu, address);
941 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
942
943 if (wb == Post)
944 address += offset;
945
946 if (wb != NoWriteBack)
947 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
948 }
949
950 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
951 static void
952 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
953 {
954 ldrsb_wb (cpu, offset, NoWriteBack);
955 }
956
957 /* 64 bit load sign-extended byte scaled or unscaled zero-
958 or sign-extended 32-bit register offset. */
959 static void
960 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
961 {
962 unsigned rm = INSTR (20, 16);
963 unsigned rn = INSTR (9, 5);
964 unsigned rt = INSTR (4, 0);
965 /* rn may reference SP, rm and rt must reference ZR */
966
967 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
968 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
969 extension);
970 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
971 /* There is no scaling required for a byte load. */
972 aarch64_set_reg_s64 (cpu, rt, NO_SP,
973 aarch64_get_mem_s8 (cpu, address + displacement));
974 }
975
976 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
977 static void
978 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
979 {
980 unsigned rn = INSTR (9, 5);
981 unsigned rt = INSTR (4, 0);
982 uint32_t val;
983
984 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
985 /* The target register may not be SP but the source may be. */
986 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
987 + SCALE (offset, 16));
988 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
989 }
990
991 /* 32 bit load zero-extended short unscaled signed 9 bit
992 with pre- or post-writeback. */
993 static void
994 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
995 {
996 unsigned rn = INSTR (9, 5);
997 unsigned rt = INSTR (4, 0);
998 uint64_t address;
999
1000 if (rn == rt && wb != NoWriteBack)
1001 HALT_UNALLOC;
1002
1003 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1004
1005 if (wb != Post)
1006 address += offset;
1007
1008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1009 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1010
1011 if (wb == Post)
1012 address += offset;
1013
1014 if (wb != NoWriteBack)
1015 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1016 }
1017
1018 /* 32 bit load zero-extended short scaled or unscaled zero-
1019 or sign-extended 32-bit register offset. */
1020 static void
1021 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1022 {
1023 unsigned rm = INSTR (20, 16);
1024 unsigned rn = INSTR (9, 5);
1025 unsigned rt = INSTR (4, 0);
1026 /* rn may reference SP, rm and rt must reference ZR */
1027
1028 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1029 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1030 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1031
1032 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1033 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1034 aarch64_get_mem_u16 (cpu, address + displacement));
1035 }
1036
1037 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1038 static void
1039 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1040 {
1041 unsigned rn = INSTR (9, 5);
1042 unsigned rt = INSTR (4, 0);
1043 int32_t val;
1044
1045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1046 /* The target register may not be SP but the source may be. */
1047 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1048 + SCALE (offset, 16));
1049 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1050 }
1051
1052 /* 32 bit load sign-extended short unscaled signed 9 bit
1053 with pre- or post-writeback. */
1054 static void
1055 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1056 {
1057 unsigned rn = INSTR (9, 5);
1058 unsigned rt = INSTR (4, 0);
1059 uint64_t address;
1060
1061 if (rn == rt && wb != NoWriteBack)
1062 HALT_UNALLOC;
1063
1064 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1065
1066 if (wb != Post)
1067 address += offset;
1068
1069 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1070 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1071 (int32_t) aarch64_get_mem_s16 (cpu, address));
1072
1073 if (wb == Post)
1074 address += offset;
1075
1076 if (wb != NoWriteBack)
1077 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1078 }
1079
1080 /* 32 bit load sign-extended short scaled or unscaled zero-
1081 or sign-extended 32-bit register offset. */
1082 static void
1083 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1084 {
1085 unsigned rm = INSTR (20, 16);
1086 unsigned rn = INSTR (9, 5);
1087 unsigned rt = INSTR (4, 0);
1088 /* rn may reference SP, rm and rt must reference ZR */
1089
1090 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1091 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1092 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1093
1094 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1095 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1096 (int32_t) aarch64_get_mem_s16
1097 (cpu, address + displacement));
1098 }
1099
1100 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1101 static void
1102 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1103 {
1104 unsigned rn = INSTR (9, 5);
1105 unsigned rt = INSTR (4, 0);
1106 int64_t val;
1107
1108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1109 /* The target register may not be SP but the source may be. */
1110 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1111 + SCALE (offset, 16));
1112 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1113 }
1114
1115 /* 64 bit load sign-extended short unscaled signed 9 bit
1116 with pre- or post-writeback. */
1117 static void
1118 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1119 {
1120 unsigned rn = INSTR (9, 5);
1121 unsigned rt = INSTR (4, 0);
1122 uint64_t address;
1123 int64_t val;
1124
1125 if (rn == rt && wb != NoWriteBack)
1126 HALT_UNALLOC;
1127
1128 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1129 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1130
1131 if (wb != Post)
1132 address += offset;
1133
1134 val = aarch64_get_mem_s16 (cpu, address);
1135 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1136
1137 if (wb == Post)
1138 address += offset;
1139
1140 if (wb != NoWriteBack)
1141 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1142 }
1143
1144 /* 64 bit load sign-extended short scaled or unscaled zero-
1145 or sign-extended 32-bit register offset. */
1146 static void
1147 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1148 {
1149 unsigned rm = INSTR (20, 16);
1150 unsigned rn = INSTR (9, 5);
1151 unsigned rt = INSTR (4, 0);
1152
1153 /* rn may reference SP, rm and rt must reference ZR */
1154
1155 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1156 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1157 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1158 int64_t val;
1159
1160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1161 val = aarch64_get_mem_s16 (cpu, address + displacement);
1162 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1163 }
1164
1165 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1166 static void
1167 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1168 {
1169 unsigned rn = INSTR (9, 5);
1170 unsigned rt = INSTR (4, 0);
1171 int64_t val;
1172
1173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1174 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1175 + SCALE (offset, 32));
1176 /* The target register may not be SP but the source may be. */
1177 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1178 }
1179
1180 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1181 with pre- or post-writeback. */
1182 static void
1183 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1184 {
1185 unsigned rn = INSTR (9, 5);
1186 unsigned rt = INSTR (4, 0);
1187 uint64_t address;
1188
1189 if (rn == rt && wb != NoWriteBack)
1190 HALT_UNALLOC;
1191
1192 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1193
1194 if (wb != Post)
1195 address += offset;
1196
1197 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1198 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1199
1200 if (wb == Post)
1201 address += offset;
1202
1203 if (wb != NoWriteBack)
1204 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1205 }
1206
1207 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1208 or sign-extended 32-bit register offset. */
1209 static void
1210 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1211 {
1212 unsigned rm = INSTR (20, 16);
1213 unsigned rn = INSTR (9, 5);
1214 unsigned rt = INSTR (4, 0);
1215 /* rn may reference SP, rm and rt must reference ZR */
1216
1217 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1218 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1219 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1220
1221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1222 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1223 aarch64_get_mem_s32 (cpu, address + displacement));
1224 }
1225
1226 /* N.B. with stores the value in source is written to the
1227 address identified by source2 modified by source3/offset. */
1228
1229 /* 32 bit store scaled unsigned 12 bit. */
1230 static void
1231 str32_abs (sim_cpu *cpu, uint32_t offset)
1232 {
1233 unsigned rn = INSTR (9, 5);
1234 unsigned rt = INSTR (4, 0);
1235
1236 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1237 /* The target register may not be SP but the source may be. */
1238 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1239 + SCALE (offset, 32)),
1240 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1241 }
1242
1243 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1244 static void
1245 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1246 {
1247 unsigned rn = INSTR (9, 5);
1248 unsigned rt = INSTR (4, 0);
1249 uint64_t address;
1250
1251 if (rn == rt && wb != NoWriteBack)
1252 HALT_UNALLOC;
1253
1254 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1255 if (wb != Post)
1256 address += offset;
1257
1258 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1259 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1260
1261 if (wb == Post)
1262 address += offset;
1263
1264 if (wb != NoWriteBack)
1265 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1266 }
1267
1268 /* 32 bit store scaled or unscaled zero- or
1269 sign-extended 32-bit register offset. */
1270 static void
1271 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1272 {
1273 unsigned rm = INSTR (20, 16);
1274 unsigned rn = INSTR (9, 5);
1275 unsigned rt = INSTR (4, 0);
1276
1277 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1278 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1279 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1280
1281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1282 aarch64_set_mem_u32 (cpu, address + displacement,
1283 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1284 }
1285
1286 /* 64 bit store scaled unsigned 12 bit. */
1287 static void
1288 str_abs (sim_cpu *cpu, uint32_t offset)
1289 {
1290 unsigned rn = INSTR (9, 5);
1291 unsigned rt = INSTR (4, 0);
1292
1293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1294 aarch64_set_mem_u64 (cpu,
1295 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1296 + SCALE (offset, 64),
1297 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1298 }
1299
1300 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1301 static void
1302 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1303 {
1304 unsigned rn = INSTR (9, 5);
1305 unsigned rt = INSTR (4, 0);
1306 uint64_t address;
1307
1308 if (rn == rt && wb != NoWriteBack)
1309 HALT_UNALLOC;
1310
1311 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1312
1313 if (wb != Post)
1314 address += offset;
1315
1316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1317 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1318
1319 if (wb == Post)
1320 address += offset;
1321
1322 if (wb != NoWriteBack)
1323 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1324 }
1325
1326 /* 64 bit store scaled or unscaled zero-
1327 or sign-extended 32-bit register offset. */
1328 static void
1329 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1330 {
1331 unsigned rm = INSTR (20, 16);
1332 unsigned rn = INSTR (9, 5);
1333 unsigned rt = INSTR (4, 0);
1334 /* rn may reference SP, rm and rt must reference ZR */
1335
1336 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1337 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1338 extension);
1339 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1340
1341 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1342 aarch64_set_mem_u64 (cpu, address + displacement,
1343 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1344 }
1345
1346 /* 32 bit store byte scaled unsigned 12 bit. */
1347 static void
1348 strb_abs (sim_cpu *cpu, uint32_t offset)
1349 {
1350 unsigned rn = INSTR (9, 5);
1351 unsigned rt = INSTR (4, 0);
1352
1353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1354 /* The target register may not be SP but the source may be.
1355 There is no scaling required for a byte load. */
1356 aarch64_set_mem_u8 (cpu,
1357 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1358 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1359 }
1360
1361 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1362 static void
1363 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1364 {
1365 unsigned rn = INSTR (9, 5);
1366 unsigned rt = INSTR (4, 0);
1367 uint64_t address;
1368
1369 if (rn == rt && wb != NoWriteBack)
1370 HALT_UNALLOC;
1371
1372 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1373
1374 if (wb != Post)
1375 address += offset;
1376
1377 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1378 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1379
1380 if (wb == Post)
1381 address += offset;
1382
1383 if (wb != NoWriteBack)
1384 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1385 }
1386
1387 /* 32 bit store byte scaled or unscaled zero-
1388 or sign-extended 32-bit register offset. */
1389 static void
1390 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1391 {
1392 unsigned rm = INSTR (20, 16);
1393 unsigned rn = INSTR (9, 5);
1394 unsigned rt = INSTR (4, 0);
1395 /* rn may reference SP, rm and rt must reference ZR */
1396
1397 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1399 extension);
1400
1401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1402 /* There is no scaling required for a byte load. */
1403 aarch64_set_mem_u8 (cpu, address + displacement,
1404 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1405 }
1406
1407 /* 32 bit store short scaled unsigned 12 bit. */
1408 static void
1409 strh_abs (sim_cpu *cpu, uint32_t offset)
1410 {
1411 unsigned rn = INSTR (9, 5);
1412 unsigned rt = INSTR (4, 0);
1413
1414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1415 /* The target register may not be SP but the source may be. */
1416 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1417 + SCALE (offset, 16),
1418 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1419 }
1420
1421 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1422 static void
1423 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1424 {
1425 unsigned rn = INSTR (9, 5);
1426 unsigned rt = INSTR (4, 0);
1427 uint64_t address;
1428
1429 if (rn == rt && wb != NoWriteBack)
1430 HALT_UNALLOC;
1431
1432 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1433
1434 if (wb != Post)
1435 address += offset;
1436
1437 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1438 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1439
1440 if (wb == Post)
1441 address += offset;
1442
1443 if (wb != NoWriteBack)
1444 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1445 }
1446
1447 /* 32 bit store short scaled or unscaled zero-
1448 or sign-extended 32-bit register offset. */
1449 static void
1450 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1451 {
1452 unsigned rm = INSTR (20, 16);
1453 unsigned rn = INSTR (9, 5);
1454 unsigned rt = INSTR (4, 0);
1455 /* rn may reference SP, rm and rt must reference ZR */
1456
1457 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1458 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1459 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1460
1461 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1462 aarch64_set_mem_u16 (cpu, address + displacement,
1463 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1464 }
1465
1466 /* Prefetch unsigned 12 bit. */
1467 static void
1468 prfm_abs (sim_cpu *cpu, uint32_t offset)
1469 {
1470 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1471 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1472 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1473 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1474 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1475 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1476 ow ==> UNALLOC
1477 PrfOp prfop = prfop (instr, 4, 0);
1478 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1479 + SCALE (offset, 64). */
1480
1481 /* TODO : implement prefetch of address. */
1482 }
1483
1484 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1485 static void
1486 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1487 {
1488 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1489 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1490 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1491 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1492 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1493 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1494 ow ==> UNALLOC
1495 rn may reference SP, rm may only reference ZR
1496 PrfOp prfop = prfop (instr, 4, 0);
1497 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1499 extension);
1500 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1501 uint64_t address = base + displacement. */
1502
1503 /* TODO : implement prefetch of address */
1504 }
1505
1506 /* 64 bit pc-relative prefetch. */
1507 static void
1508 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1509 {
1510 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1511 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1512 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1513 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1514 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1515 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1516 ow ==> UNALLOC
1517 PrfOp prfop = prfop (instr, 4, 0);
1518 uint64_t address = aarch64_get_PC (cpu) + offset. */
1519
1520 /* TODO : implement this */
1521 }
1522
1523 /* Load-store exclusive. */
1524
1525 static void
1526 ldxr (sim_cpu *cpu)
1527 {
1528 unsigned rn = INSTR (9, 5);
1529 unsigned rt = INSTR (4, 0);
1530 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1531 int size = INSTR (31, 30);
1532 /* int ordered = INSTR (15, 15); */
1533 /* int exclusive = ! INSTR (23, 23); */
1534
1535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1536 switch (size)
1537 {
1538 case 0:
1539 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1540 break;
1541 case 1:
1542 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1543 break;
1544 case 2:
1545 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1546 break;
1547 case 3:
1548 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1549 break;
1550 }
1551 }
1552
1553 static void
1554 stxr (sim_cpu *cpu)
1555 {
1556 unsigned rn = INSTR (9, 5);
1557 unsigned rt = INSTR (4, 0);
1558 unsigned rs = INSTR (20, 16);
1559 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1560 int size = INSTR (31, 30);
1561 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1562
1563 switch (size)
1564 {
1565 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1566 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1567 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1568 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1569 }
1570
1571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1572 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1573 }
1574
1575 static void
1576 dexLoadLiteral (sim_cpu *cpu)
1577 {
1578 /* instr[29,27] == 011
1579 instr[25,24] == 00
1580 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1581 010 ==> LDRX, 011 ==> FLDRD
1582 100 ==> LDRSW, 101 ==> FLDRQ
1583 110 ==> PRFM, 111 ==> UNALLOC
1584 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1585 instr[23, 5] == simm19 */
1586
1587 /* unsigned rt = INSTR (4, 0); */
1588 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1589 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1590
1591 switch (dispatch)
1592 {
1593 case 0: ldr32_pcrel (cpu, imm); break;
1594 case 1: fldrs_pcrel (cpu, imm); break;
1595 case 2: ldr_pcrel (cpu, imm); break;
1596 case 3: fldrd_pcrel (cpu, imm); break;
1597 case 4: ldrsw_pcrel (cpu, imm); break;
1598 case 5: fldrq_pcrel (cpu, imm); break;
1599 case 6: prfm_pcrel (cpu, imm); break;
1600 case 7:
1601 default:
1602 HALT_UNALLOC;
1603 }
1604 }
1605
1606 /* Immediate arithmetic
1607 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1608 value left shifted by 12 bits (done at decode).
1609
1610 N.B. the register args (dest, source) can normally be Xn or SP.
1611 the exception occurs for flag setting instructions which may
1612 only use Xn for the output (dest). */
1613
1614 /* 32 bit add immediate. */
1615 static void
1616 add32 (sim_cpu *cpu, uint32_t aimm)
1617 {
1618 unsigned rn = INSTR (9, 5);
1619 unsigned rd = INSTR (4, 0);
1620
1621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1622 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1623 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1624 }
1625
1626 /* 64 bit add immediate. */
1627 static void
1628 add64 (sim_cpu *cpu, uint32_t aimm)
1629 {
1630 unsigned rn = INSTR (9, 5);
1631 unsigned rd = INSTR (4, 0);
1632
1633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1634 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1635 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1636 }
1637
1638 static void
1639 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1640 {
1641 int32_t result = value1 + value2;
1642 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1643 uint64_t uresult = (uint64_t)(uint32_t) value1
1644 + (uint64_t)(uint32_t) value2;
1645 uint32_t flags = 0;
1646
1647 if (result == 0)
1648 flags |= Z;
1649
1650 if (result & (1 << 31))
1651 flags |= N;
1652
1653 if (uresult != (uint32_t)result)
1654 flags |= C;
1655
1656 if (sresult != result)
1657 flags |= V;
1658
1659 aarch64_set_CPSR (cpu, flags);
1660 }
1661
1662 #define NEG(a) (((a) & signbit) == signbit)
1663 #define POS(a) (((a) & signbit) == 0)
1664
1665 static void
1666 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1667 {
1668 uint64_t result = value1 + value2;
1669 uint32_t flags = 0;
1670 uint64_t signbit = 1ULL << 63;
1671
1672 if (result == 0)
1673 flags |= Z;
1674
1675 if (NEG (result))
1676 flags |= N;
1677
1678 if ( (NEG (value1) && NEG (value2))
1679 || (NEG (value1) && POS (result))
1680 || (NEG (value2) && POS (result)))
1681 flags |= C;
1682
1683 if ( (NEG (value1) && NEG (value2) && POS (result))
1684 || (POS (value1) && POS (value2) && NEG (result)))
1685 flags |= V;
1686
1687 aarch64_set_CPSR (cpu, flags);
1688 }
1689
1690 static void
1691 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1692 {
1693 uint32_t result = value1 - value2;
1694 uint32_t flags = 0;
1695 uint32_t signbit = 1U << 31;
1696
1697 if (result == 0)
1698 flags |= Z;
1699
1700 if (NEG (result))
1701 flags |= N;
1702
1703 if ( (NEG (value1) && POS (value2))
1704 || (NEG (value1) && POS (result))
1705 || (POS (value2) && POS (result)))
1706 flags |= C;
1707
1708 if ( (NEG (value1) && POS (value2) && POS (result))
1709 || (POS (value1) && NEG (value2) && NEG (result)))
1710 flags |= V;
1711
1712 aarch64_set_CPSR (cpu, flags);
1713 }
1714
1715 static void
1716 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1717 {
1718 uint64_t result = value1 - value2;
1719 uint32_t flags = 0;
1720 uint64_t signbit = 1ULL << 63;
1721
1722 if (result == 0)
1723 flags |= Z;
1724
1725 if (NEG (result))
1726 flags |= N;
1727
1728 if ( (NEG (value1) && POS (value2))
1729 || (NEG (value1) && POS (result))
1730 || (POS (value2) && POS (result)))
1731 flags |= C;
1732
1733 if ( (NEG (value1) && POS (value2) && POS (result))
1734 || (POS (value1) && NEG (value2) && NEG (result)))
1735 flags |= V;
1736
1737 aarch64_set_CPSR (cpu, flags);
1738 }
1739
1740 static void
1741 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1742 {
1743 uint32_t flags = 0;
1744
1745 if (result == 0)
1746 flags |= Z;
1747 else
1748 flags &= ~ Z;
1749
1750 if (result & (1 << 31))
1751 flags |= N;
1752 else
1753 flags &= ~ N;
1754
1755 aarch64_set_CPSR (cpu, flags);
1756 }
1757
1758 static void
1759 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1760 {
1761 uint32_t flags = 0;
1762
1763 if (result == 0)
1764 flags |= Z;
1765 else
1766 flags &= ~ Z;
1767
1768 if (result & (1ULL << 63))
1769 flags |= N;
1770 else
1771 flags &= ~ N;
1772
1773 aarch64_set_CPSR (cpu, flags);
1774 }
1775
1776 /* 32 bit add immediate set flags. */
1777 static void
1778 adds32 (sim_cpu *cpu, uint32_t aimm)
1779 {
1780 unsigned rn = INSTR (9, 5);
1781 unsigned rd = INSTR (4, 0);
1782 /* TODO : do we need to worry about signs here? */
1783 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1784
1785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1786 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1787 set_flags_for_add32 (cpu, value1, aimm);
1788 }
1789
1790 /* 64 bit add immediate set flags. */
1791 static void
1792 adds64 (sim_cpu *cpu, uint32_t aimm)
1793 {
1794 unsigned rn = INSTR (9, 5);
1795 unsigned rd = INSTR (4, 0);
1796 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1797 uint64_t value2 = aimm;
1798
1799 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1800 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1801 set_flags_for_add64 (cpu, value1, value2);
1802 }
1803
1804 /* 32 bit sub immediate. */
1805 static void
1806 sub32 (sim_cpu *cpu, uint32_t aimm)
1807 {
1808 unsigned rn = INSTR (9, 5);
1809 unsigned rd = INSTR (4, 0);
1810
1811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1812 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1813 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1814 }
1815
1816 /* 64 bit sub immediate. */
1817 static void
1818 sub64 (sim_cpu *cpu, uint32_t aimm)
1819 {
1820 unsigned rn = INSTR (9, 5);
1821 unsigned rd = INSTR (4, 0);
1822
1823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1824 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1825 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1826 }
1827
1828 /* 32 bit sub immediate set flags. */
1829 static void
1830 subs32 (sim_cpu *cpu, uint32_t aimm)
1831 {
1832 unsigned rn = INSTR (9, 5);
1833 unsigned rd = INSTR (4, 0);
1834 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1835 uint32_t value2 = aimm;
1836
1837 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1838 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1839 set_flags_for_sub32 (cpu, value1, value2);
1840 }
1841
1842 /* 64 bit sub immediate set flags. */
1843 static void
1844 subs64 (sim_cpu *cpu, uint32_t aimm)
1845 {
1846 unsigned rn = INSTR (9, 5);
1847 unsigned rd = INSTR (4, 0);
1848 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1849 uint32_t value2 = aimm;
1850
1851 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1852 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1853 set_flags_for_sub64 (cpu, value1, value2);
1854 }
1855
1856 /* Data Processing Register. */
1857
1858 /* First two helpers to perform the shift operations. */
1859
1860 static inline uint32_t
1861 shifted32 (uint32_t value, Shift shift, uint32_t count)
1862 {
1863 switch (shift)
1864 {
1865 default:
1866 case LSL:
1867 return (value << count);
1868 case LSR:
1869 return (value >> count);
1870 case ASR:
1871 {
1872 int32_t svalue = value;
1873 return (svalue >> count);
1874 }
1875 case ROR:
1876 {
1877 uint32_t top = value >> count;
1878 uint32_t bottom = value << (32 - count);
1879 return (bottom | top);
1880 }
1881 }
1882 }
1883
1884 static inline uint64_t
1885 shifted64 (uint64_t value, Shift shift, uint32_t count)
1886 {
1887 switch (shift)
1888 {
1889 default:
1890 case LSL:
1891 return (value << count);
1892 case LSR:
1893 return (value >> count);
1894 case ASR:
1895 {
1896 int64_t svalue = value;
1897 return (svalue >> count);
1898 }
1899 case ROR:
1900 {
1901 uint64_t top = value >> count;
1902 uint64_t bottom = value << (64 - count);
1903 return (bottom | top);
1904 }
1905 }
1906 }
1907
1908 /* Arithmetic shifted register.
1909 These allow an optional LSL, ASR or LSR to the second source
1910 register with a count up to the register bit count.
1911
1912 N.B register args may not be SP. */
1913
1914 /* 32 bit ADD shifted register. */
1915 static void
1916 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1917 {
1918 unsigned rm = INSTR (20, 16);
1919 unsigned rn = INSTR (9, 5);
1920 unsigned rd = INSTR (4, 0);
1921
1922 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1923 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1924 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1925 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1926 shift, count));
1927 }
1928
1929 /* 64 bit ADD shifted register. */
1930 static void
1931 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1932 {
1933 unsigned rm = INSTR (20, 16);
1934 unsigned rn = INSTR (9, 5);
1935 unsigned rd = INSTR (4, 0);
1936
1937 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1938 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1939 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1940 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1941 shift, count));
1942 }
1943
1944 /* 32 bit ADD shifted register setting flags. */
1945 static void
1946 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1947 {
1948 unsigned rm = INSTR (20, 16);
1949 unsigned rn = INSTR (9, 5);
1950 unsigned rd = INSTR (4, 0);
1951
1952 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1953 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1954 shift, count);
1955
1956 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1957 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1958 set_flags_for_add32 (cpu, value1, value2);
1959 }
1960
1961 /* 64 bit ADD shifted register setting flags. */
1962 static void
1963 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1964 {
1965 unsigned rm = INSTR (20, 16);
1966 unsigned rn = INSTR (9, 5);
1967 unsigned rd = INSTR (4, 0);
1968
1969 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1970 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1971 shift, count);
1972
1973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1974 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1975 set_flags_for_add64 (cpu, value1, value2);
1976 }
1977
1978 /* 32 bit SUB shifted register. */
1979 static void
1980 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1981 {
1982 unsigned rm = INSTR (20, 16);
1983 unsigned rn = INSTR (9, 5);
1984 unsigned rd = INSTR (4, 0);
1985
1986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1987 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1988 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1989 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1990 shift, count));
1991 }
1992
1993 /* 64 bit SUB shifted register. */
1994 static void
1995 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1996 {
1997 unsigned rm = INSTR (20, 16);
1998 unsigned rn = INSTR (9, 5);
1999 unsigned rd = INSTR (4, 0);
2000
2001 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2002 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2003 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2004 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2005 shift, count));
2006 }
2007
2008 /* 32 bit SUB shifted register setting flags. */
2009 static void
2010 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2011 {
2012 unsigned rm = INSTR (20, 16);
2013 unsigned rn = INSTR (9, 5);
2014 unsigned rd = INSTR (4, 0);
2015
2016 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2017 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2018 shift, count);
2019
2020 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2021 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2022 set_flags_for_sub32 (cpu, value1, value2);
2023 }
2024
2025 /* 64 bit SUB shifted register setting flags. */
2026 static void
2027 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2028 {
2029 unsigned rm = INSTR (20, 16);
2030 unsigned rn = INSTR (9, 5);
2031 unsigned rd = INSTR (4, 0);
2032
2033 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2034 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2035 shift, count);
2036
2037 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2038 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2039 set_flags_for_sub64 (cpu, value1, value2);
2040 }
2041
2042 /* First a couple more helpers to fetch the
2043 relevant source register element either
2044 sign or zero extended as required by the
2045 extension value. */
2046
2047 static uint32_t
2048 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2049 {
2050 switch (extension)
2051 {
2052 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2053 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2054 case UXTW: /* Fall through. */
2055 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2056 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2057 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2058 case SXTW: /* Fall through. */
2059 case SXTX: /* Fall through. */
2060 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2061 }
2062 }
2063
2064 static uint64_t
2065 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2066 {
2067 switch (extension)
2068 {
2069 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2070 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2071 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2072 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2073 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2074 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2075 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2076 case SXTX:
2077 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2078 }
2079 }
2080
2081 /* Arithmetic extending register
2082 These allow an optional sign extension of some portion of the
2083 second source register followed by an optional left shift of
2084 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2085
2086 N.B output (dest) and first input arg (source) may normally be Xn
2087 or SP. However, for flag setting operations dest can only be
2088 Xn. Second input registers are always Xn. */
2089
2090 /* 32 bit ADD extending register. */
2091 static void
2092 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2093 {
2094 unsigned rm = INSTR (20, 16);
2095 unsigned rn = INSTR (9, 5);
2096 unsigned rd = INSTR (4, 0);
2097
2098 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2099 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2100 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2101 + (extreg32 (cpu, rm, extension) << shift));
2102 }
2103
2104 /* 64 bit ADD extending register.
2105 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2106 static void
2107 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2108 {
2109 unsigned rm = INSTR (20, 16);
2110 unsigned rn = INSTR (9, 5);
2111 unsigned rd = INSTR (4, 0);
2112
2113 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2114 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2115 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2116 + (extreg64 (cpu, rm, extension) << shift));
2117 }
2118
2119 /* 32 bit ADD extending register setting flags. */
2120 static void
2121 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2122 {
2123 unsigned rm = INSTR (20, 16);
2124 unsigned rn = INSTR (9, 5);
2125 unsigned rd = INSTR (4, 0);
2126
2127 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2128 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2129
2130 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2131 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2132 set_flags_for_add32 (cpu, value1, value2);
2133 }
2134
2135 /* 64 bit ADD extending register setting flags */
2136 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2137 static void
2138 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2139 {
2140 unsigned rm = INSTR (20, 16);
2141 unsigned rn = INSTR (9, 5);
2142 unsigned rd = INSTR (4, 0);
2143
2144 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2145 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2146
2147 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2148 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2149 set_flags_for_add64 (cpu, value1, value2);
2150 }
2151
2152 /* 32 bit SUB extending register. */
2153 static void
2154 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2155 {
2156 unsigned rm = INSTR (20, 16);
2157 unsigned rn = INSTR (9, 5);
2158 unsigned rd = INSTR (4, 0);
2159
2160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2161 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2162 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2163 - (extreg32 (cpu, rm, extension) << shift));
2164 }
2165
2166 /* 64 bit SUB extending register. */
2167 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2168 static void
2169 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2170 {
2171 unsigned rm = INSTR (20, 16);
2172 unsigned rn = INSTR (9, 5);
2173 unsigned rd = INSTR (4, 0);
2174
2175 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2176 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2177 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2178 - (extreg64 (cpu, rm, extension) << shift));
2179 }
2180
2181 /* 32 bit SUB extending register setting flags. */
2182 static void
2183 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2184 {
2185 unsigned rm = INSTR (20, 16);
2186 unsigned rn = INSTR (9, 5);
2187 unsigned rd = INSTR (4, 0);
2188
2189 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2190 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2191
2192 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2193 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2194 set_flags_for_sub32 (cpu, value1, value2);
2195 }
2196
2197 /* 64 bit SUB extending register setting flags */
2198 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2199 static void
2200 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2201 {
2202 unsigned rm = INSTR (20, 16);
2203 unsigned rn = INSTR (9, 5);
2204 unsigned rd = INSTR (4, 0);
2205
2206 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2207 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2208
2209 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2210 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2211 set_flags_for_sub64 (cpu, value1, value2);
2212 }
2213
2214 static void
2215 dexAddSubtractImmediate (sim_cpu *cpu)
2216 {
2217 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2218 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2219 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2220 instr[28,24] = 10001
2221 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2222 instr[21,10] = uimm12
2223 instr[9,5] = Rn
2224 instr[4,0] = Rd */
2225
2226 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2227 uint32_t shift = INSTR (23, 22);
2228 uint32_t imm = INSTR (21, 10);
2229 uint32_t dispatch = INSTR (31, 29);
2230
2231 NYI_assert (28, 24, 0x11);
2232
2233 if (shift > 1)
2234 HALT_UNALLOC;
2235
2236 if (shift)
2237 imm <<= 12;
2238
2239 switch (dispatch)
2240 {
2241 case 0: add32 (cpu, imm); break;
2242 case 1: adds32 (cpu, imm); break;
2243 case 2: sub32 (cpu, imm); break;
2244 case 3: subs32 (cpu, imm); break;
2245 case 4: add64 (cpu, imm); break;
2246 case 5: adds64 (cpu, imm); break;
2247 case 6: sub64 (cpu, imm); break;
2248 case 7: subs64 (cpu, imm); break;
2249 }
2250 }
2251
2252 static void
2253 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2254 {
2255 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2256 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2257 instr[28,24] = 01011
2258 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2259 instr[21] = 0
2260 instr[20,16] = Rm
2261 instr[15,10] = count : must be 0xxxxx for 32 bit
2262 instr[9,5] = Rn
2263 instr[4,0] = Rd */
2264
2265 uint32_t size = INSTR (31, 31);
2266 uint32_t count = INSTR (15, 10);
2267 Shift shiftType = INSTR (23, 22);
2268
2269 NYI_assert (28, 24, 0x0B);
2270 NYI_assert (21, 21, 0);
2271
2272 /* Shift encoded as ROR is unallocated. */
2273 if (shiftType == ROR)
2274 HALT_UNALLOC;
2275
2276 /* 32 bit operations must have count[5] = 0
2277 or else we have an UNALLOC. */
2278 if (size == 0 && uimm (count, 5, 5))
2279 HALT_UNALLOC;
2280
2281 /* Dispatch on size:op i.e instr [31,29]. */
2282 switch (INSTR (31, 29))
2283 {
2284 case 0: add32_shift (cpu, shiftType, count); break;
2285 case 1: adds32_shift (cpu, shiftType, count); break;
2286 case 2: sub32_shift (cpu, shiftType, count); break;
2287 case 3: subs32_shift (cpu, shiftType, count); break;
2288 case 4: add64_shift (cpu, shiftType, count); break;
2289 case 5: adds64_shift (cpu, shiftType, count); break;
2290 case 6: sub64_shift (cpu, shiftType, count); break;
2291 case 7: subs64_shift (cpu, shiftType, count); break;
2292 }
2293 }
2294
2295 static void
2296 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2297 {
2298 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2299 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2300 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2301 instr[28,24] = 01011
2302 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2303 instr[21] = 1
2304 instr[20,16] = Rm
2305 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2306 000 ==> LSL|UXTW, 001 ==> UXTZ,
2307 000 ==> SXTB, 001 ==> SXTH,
2308 000 ==> SXTW, 001 ==> SXTX,
2309 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2310 instr[9,5] = Rn
2311 instr[4,0] = Rd */
2312
2313 Extension extensionType = INSTR (15, 13);
2314 uint32_t shift = INSTR (12, 10);
2315
2316 NYI_assert (28, 24, 0x0B);
2317 NYI_assert (21, 21, 1);
2318
2319 /* Shift may not exceed 4. */
2320 if (shift > 4)
2321 HALT_UNALLOC;
2322
2323 /* Dispatch on size:op:set?. */
2324 switch (INSTR (31, 29))
2325 {
2326 case 0: add32_ext (cpu, extensionType, shift); break;
2327 case 1: adds32_ext (cpu, extensionType, shift); break;
2328 case 2: sub32_ext (cpu, extensionType, shift); break;
2329 case 3: subs32_ext (cpu, extensionType, shift); break;
2330 case 4: add64_ext (cpu, extensionType, shift); break;
2331 case 5: adds64_ext (cpu, extensionType, shift); break;
2332 case 6: sub64_ext (cpu, extensionType, shift); break;
2333 case 7: subs64_ext (cpu, extensionType, shift); break;
2334 }
2335 }
2336
2337 /* Conditional data processing
2338 Condition register is implicit 3rd source. */
2339
2340 /* 32 bit add with carry. */
2341 /* N.B register args may not be SP. */
2342
2343 static void
2344 adc32 (sim_cpu *cpu)
2345 {
2346 unsigned rm = INSTR (20, 16);
2347 unsigned rn = INSTR (9, 5);
2348 unsigned rd = INSTR (4, 0);
2349
2350 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2351 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2352 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2353 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2354 + IS_SET (C));
2355 }
2356
2357 /* 64 bit add with carry */
2358 static void
2359 adc64 (sim_cpu *cpu)
2360 {
2361 unsigned rm = INSTR (20, 16);
2362 unsigned rn = INSTR (9, 5);
2363 unsigned rd = INSTR (4, 0);
2364
2365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2366 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2367 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2368 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2369 + IS_SET (C));
2370 }
2371
2372 /* 32 bit add with carry setting flags. */
2373 static void
2374 adcs32 (sim_cpu *cpu)
2375 {
2376 unsigned rm = INSTR (20, 16);
2377 unsigned rn = INSTR (9, 5);
2378 unsigned rd = INSTR (4, 0);
2379
2380 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2381 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2382 uint32_t carry = IS_SET (C);
2383
2384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2385 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2386 set_flags_for_add32 (cpu, value1, value2 + carry);
2387 }
2388
2389 /* 64 bit add with carry setting flags. */
2390 static void
2391 adcs64 (sim_cpu *cpu)
2392 {
2393 unsigned rm = INSTR (20, 16);
2394 unsigned rn = INSTR (9, 5);
2395 unsigned rd = INSTR (4, 0);
2396
2397 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2398 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2399 uint64_t carry = IS_SET (C);
2400
2401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2402 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2403 set_flags_for_add64 (cpu, value1, value2 + carry);
2404 }
2405
2406 /* 32 bit sub with carry. */
2407 static void
2408 sbc32 (sim_cpu *cpu)
2409 {
2410 unsigned rm = INSTR (20, 16);
2411 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2412 unsigned rd = INSTR (4, 0);
2413
2414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2415 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2416 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2417 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2418 - 1 + IS_SET (C));
2419 }
2420
2421 /* 64 bit sub with carry */
2422 static void
2423 sbc64 (sim_cpu *cpu)
2424 {
2425 unsigned rm = INSTR (20, 16);
2426 unsigned rn = INSTR (9, 5);
2427 unsigned rd = INSTR (4, 0);
2428
2429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2430 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2431 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2432 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2433 - 1 + IS_SET (C));
2434 }
2435
2436 /* 32 bit sub with carry setting flags */
2437 static void
2438 sbcs32 (sim_cpu *cpu)
2439 {
2440 unsigned rm = INSTR (20, 16);
2441 unsigned rn = INSTR (9, 5);
2442 unsigned rd = INSTR (4, 0);
2443
2444 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2445 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2446 uint32_t carry = IS_SET (C);
2447 uint32_t result = value1 - value2 + 1 - carry;
2448
2449 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2450 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2451 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2452 }
2453
2454 /* 64 bit sub with carry setting flags */
2455 static void
2456 sbcs64 (sim_cpu *cpu)
2457 {
2458 unsigned rm = INSTR (20, 16);
2459 unsigned rn = INSTR (9, 5);
2460 unsigned rd = INSTR (4, 0);
2461
2462 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2463 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2464 uint64_t carry = IS_SET (C);
2465 uint64_t result = value1 - value2 + 1 - carry;
2466
2467 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2468 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2469 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2470 }
2471
2472 static void
2473 dexAddSubtractWithCarry (sim_cpu *cpu)
2474 {
2475 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2476 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2477 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2478 instr[28,21] = 1 1010 000
2479 instr[20,16] = Rm
2480 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2481 instr[9,5] = Rn
2482 instr[4,0] = Rd */
2483
2484 uint32_t op2 = INSTR (15, 10);
2485
2486 NYI_assert (28, 21, 0xD0);
2487
2488 if (op2 != 0)
2489 HALT_UNALLOC;
2490
2491 /* Dispatch on size:op:set?. */
2492 switch (INSTR (31, 29))
2493 {
2494 case 0: adc32 (cpu); break;
2495 case 1: adcs32 (cpu); break;
2496 case 2: sbc32 (cpu); break;
2497 case 3: sbcs32 (cpu); break;
2498 case 4: adc64 (cpu); break;
2499 case 5: adcs64 (cpu); break;
2500 case 6: sbc64 (cpu); break;
2501 case 7: sbcs64 (cpu); break;
2502 }
2503 }
2504
2505 static uint32_t
2506 testConditionCode (sim_cpu *cpu, CondCode cc)
2507 {
2508 /* This should be reduceable to branchless logic
2509 by some careful testing of bits in CC followed
2510 by the requisite masking and combining of bits
2511 from the flag register.
2512
2513 For now we do it with a switch. */
2514 int res;
2515
2516 switch (cc)
2517 {
2518 case EQ: res = IS_SET (Z); break;
2519 case NE: res = IS_CLEAR (Z); break;
2520 case CS: res = IS_SET (C); break;
2521 case CC: res = IS_CLEAR (C); break;
2522 case MI: res = IS_SET (N); break;
2523 case PL: res = IS_CLEAR (N); break;
2524 case VS: res = IS_SET (V); break;
2525 case VC: res = IS_CLEAR (V); break;
2526 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2527 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2528 case GE: res = IS_SET (N) == IS_SET (V); break;
2529 case LT: res = IS_SET (N) != IS_SET (V); break;
2530 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2531 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2532 case AL:
2533 case NV:
2534 default:
2535 res = 1;
2536 break;
2537 }
2538 return res;
2539 }
2540
2541 static void
2542 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2543 {
2544 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2545 instr[30] = compare with positive (1) or negative value (0)
2546 instr[29,21] = 1 1101 0010
2547 instr[20,16] = Rm or const
2548 instr[15,12] = cond
2549 instr[11] = compare reg (0) or const (1)
2550 instr[10] = 0
2551 instr[9,5] = Rn
2552 instr[4] = 0
2553 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2554 signed int negate;
2555 unsigned rm;
2556 unsigned rn;
2557
2558 NYI_assert (29, 21, 0x1d2);
2559 NYI_assert (10, 10, 0);
2560 NYI_assert (4, 4, 0);
2561
2562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2563 if (! testConditionCode (cpu, INSTR (15, 12)))
2564 {
2565 aarch64_set_CPSR (cpu, INSTR (3, 0));
2566 return;
2567 }
2568
2569 negate = INSTR (30, 30) ? 1 : -1;
2570 rm = INSTR (20, 16);
2571 rn = INSTR ( 9, 5);
2572
2573 if (INSTR (31, 31))
2574 {
2575 if (INSTR (11, 11))
2576 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2577 negate * (uint64_t) rm);
2578 else
2579 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2580 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2581 }
2582 else
2583 {
2584 if (INSTR (11, 11))
2585 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2586 negate * rm);
2587 else
2588 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2589 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2590 }
2591 }
2592
2593 static void
2594 do_vec_MOV_whole_vector (sim_cpu *cpu)
2595 {
2596 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2597
2598 instr[31] = 0
2599 instr[30] = half(0)/full(1)
2600 instr[29,21] = 001110101
2601 instr[20,16] = Vs
2602 instr[15,10] = 000111
2603 instr[9,5] = Vs
2604 instr[4,0] = Vd */
2605
2606 unsigned vs = INSTR (9, 5);
2607 unsigned vd = INSTR (4, 0);
2608
2609 NYI_assert (29, 21, 0x075);
2610 NYI_assert (15, 10, 0x07);
2611
2612 if (INSTR (20, 16) != vs)
2613 HALT_NYI;
2614
2615 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2616 if (INSTR (30, 30))
2617 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2618
2619 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2620 }
2621
2622 static void
2623 do_vec_SMOV_into_scalar (sim_cpu *cpu)
2624 {
2625 /* instr[31] = 0
2626 instr[30] = word(0)/long(1)
2627 instr[29,21] = 00 1110 000
2628 instr[20,16] = element size and index
2629 instr[15,10] = 00 0010 11
2630 instr[9,5] = V source
2631 instr[4,0] = R dest */
2632
2633 unsigned vs = INSTR (9, 5);
2634 unsigned rd = INSTR (4, 0);
2635 unsigned imm5 = INSTR (20, 16);
2636 unsigned full = INSTR (30, 30);
2637 int size, index;
2638
2639 NYI_assert (29, 21, 0x070);
2640 NYI_assert (15, 10, 0x0B);
2641
2642 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2643
2644 if (imm5 & 0x1)
2645 {
2646 size = 0;
2647 index = (imm5 >> 1) & 0xF;
2648 }
2649 else if (imm5 & 0x2)
2650 {
2651 size = 1;
2652 index = (imm5 >> 2) & 0x7;
2653 }
2654 else if (full && (imm5 & 0x4))
2655 {
2656 size = 2;
2657 index = (imm5 >> 3) & 0x3;
2658 }
2659 else
2660 HALT_UNALLOC;
2661
2662 switch (size)
2663 {
2664 case 0:
2665 if (full)
2666 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2667 aarch64_get_vec_s8 (cpu, vs, index));
2668 else
2669 aarch64_set_reg_s32 (cpu, rd, NO_SP,
2670 aarch64_get_vec_s8 (cpu, vs, index));
2671 break;
2672
2673 case 1:
2674 if (full)
2675 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2676 aarch64_get_vec_s16 (cpu, vs, index));
2677 else
2678 aarch64_set_reg_s32 (cpu, rd, NO_SP,
2679 aarch64_get_vec_s16 (cpu, vs, index));
2680 break;
2681
2682 case 2:
2683 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2684 aarch64_get_vec_s32 (cpu, vs, index));
2685 break;
2686
2687 default:
2688 HALT_UNALLOC;
2689 }
2690 }
2691
2692 static void
2693 do_vec_UMOV_into_scalar (sim_cpu *cpu)
2694 {
2695 /* instr[31] = 0
2696 instr[30] = word(0)/long(1)
2697 instr[29,21] = 00 1110 000
2698 instr[20,16] = element size and index
2699 instr[15,10] = 00 0011 11
2700 instr[9,5] = V source
2701 instr[4,0] = R dest */
2702
2703 unsigned vs = INSTR (9, 5);
2704 unsigned rd = INSTR (4, 0);
2705 unsigned imm5 = INSTR (20, 16);
2706 unsigned full = INSTR (30, 30);
2707 int size, index;
2708
2709 NYI_assert (29, 21, 0x070);
2710 NYI_assert (15, 10, 0x0F);
2711
2712 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2713
2714 if (!full)
2715 {
2716 if (imm5 & 0x1)
2717 {
2718 size = 0;
2719 index = (imm5 >> 1) & 0xF;
2720 }
2721 else if (imm5 & 0x2)
2722 {
2723 size = 1;
2724 index = (imm5 >> 2) & 0x7;
2725 }
2726 else if (imm5 & 0x4)
2727 {
2728 size = 2;
2729 index = (imm5 >> 3) & 0x3;
2730 }
2731 else
2732 HALT_UNALLOC;
2733 }
2734 else if (imm5 & 0x8)
2735 {
2736 size = 3;
2737 index = (imm5 >> 4) & 0x1;
2738 }
2739 else
2740 HALT_UNALLOC;
2741
2742 switch (size)
2743 {
2744 case 0:
2745 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2746 aarch64_get_vec_u8 (cpu, vs, index));
2747 break;
2748
2749 case 1:
2750 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2751 aarch64_get_vec_u16 (cpu, vs, index));
2752 break;
2753
2754 case 2:
2755 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2756 aarch64_get_vec_u32 (cpu, vs, index));
2757 break;
2758
2759 case 3:
2760 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2761 aarch64_get_vec_u64 (cpu, vs, index));
2762 break;
2763
2764 default:
2765 HALT_UNALLOC;
2766 }
2767 }
2768
2769 static void
2770 do_vec_INS (sim_cpu *cpu)
2771 {
2772 /* instr[31,21] = 01001110000
2773 instr[20,16] = element size and index
2774 instr[15,10] = 000111
2775 instr[9,5] = W source
2776 instr[4,0] = V dest */
2777
2778 int index;
2779 unsigned rs = INSTR (9, 5);
2780 unsigned vd = INSTR (4, 0);
2781
2782 NYI_assert (31, 21, 0x270);
2783 NYI_assert (15, 10, 0x07);
2784
2785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2786 if (INSTR (16, 16))
2787 {
2788 index = INSTR (20, 17);
2789 aarch64_set_vec_u8 (cpu, vd, index,
2790 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2791 }
2792 else if (INSTR (17, 17))
2793 {
2794 index = INSTR (20, 18);
2795 aarch64_set_vec_u16 (cpu, vd, index,
2796 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2797 }
2798 else if (INSTR (18, 18))
2799 {
2800 index = INSTR (20, 19);
2801 aarch64_set_vec_u32 (cpu, vd, index,
2802 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2803 }
2804 else if (INSTR (19, 19))
2805 {
2806 index = INSTR (20, 20);
2807 aarch64_set_vec_u64 (cpu, vd, index,
2808 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2809 }
2810 else
2811 HALT_NYI;
2812 }
2813
2814 static void
2815 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2816 {
2817 /* instr[31] = 0
2818 instr[30] = half(0)/full(1)
2819 instr[29,21] = 00 1110 000
2820 instr[20,16] = element size and index
2821 instr[15,10] = 0000 01
2822 instr[9,5] = V source
2823 instr[4,0] = V dest. */
2824
2825 unsigned full = INSTR (30, 30);
2826 unsigned vs = INSTR (9, 5);
2827 unsigned vd = INSTR (4, 0);
2828 int i, index;
2829
2830 NYI_assert (29, 21, 0x070);
2831 NYI_assert (15, 10, 0x01);
2832
2833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2834 if (INSTR (16, 16))
2835 {
2836 index = INSTR (20, 17);
2837
2838 for (i = 0; i < (full ? 16 : 8); i++)
2839 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2840 }
2841 else if (INSTR (17, 17))
2842 {
2843 index = INSTR (20, 18);
2844
2845 for (i = 0; i < (full ? 8 : 4); i++)
2846 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2847 }
2848 else if (INSTR (18, 18))
2849 {
2850 index = INSTR (20, 19);
2851
2852 for (i = 0; i < (full ? 4 : 2); i++)
2853 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2854 }
2855 else
2856 {
2857 if (INSTR (19, 19) == 0)
2858 HALT_UNALLOC;
2859
2860 if (! full)
2861 HALT_UNALLOC;
2862
2863 index = INSTR (20, 20);
2864
2865 for (i = 0; i < 2; i++)
2866 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2867 }
2868 }
2869
2870 static void
2871 do_vec_TBL (sim_cpu *cpu)
2872 {
2873 /* instr[31] = 0
2874 instr[30] = half(0)/full(1)
2875 instr[29,21] = 00 1110 000
2876 instr[20,16] = Vm
2877 instr[15] = 0
2878 instr[14,13] = vec length
2879 instr[12,10] = 000
2880 instr[9,5] = V start
2881 instr[4,0] = V dest */
2882
2883 int full = INSTR (30, 30);
2884 int len = INSTR (14, 13) + 1;
2885 unsigned vm = INSTR (20, 16);
2886 unsigned vn = INSTR (9, 5);
2887 unsigned vd = INSTR (4, 0);
2888 unsigned i;
2889
2890 NYI_assert (29, 21, 0x070);
2891 NYI_assert (12, 10, 0);
2892
2893 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2894 for (i = 0; i < (full ? 16 : 8); i++)
2895 {
2896 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2897 uint8_t val;
2898
2899 if (selector < 16)
2900 val = aarch64_get_vec_u8 (cpu, vn, selector);
2901 else if (selector < 32)
2902 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2903 else if (selector < 48)
2904 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2905 else if (selector < 64)
2906 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2907 else
2908 val = 0;
2909
2910 aarch64_set_vec_u8 (cpu, vd, i, val);
2911 }
2912 }
2913
2914 static void
2915 do_vec_TRN (sim_cpu *cpu)
2916 {
2917 /* instr[31] = 0
2918 instr[30] = half(0)/full(1)
2919 instr[29,24] = 00 1110
2920 instr[23,22] = size
2921 instr[21] = 0
2922 instr[20,16] = Vm
2923 instr[15] = 0
2924 instr[14] = TRN1 (0) / TRN2 (1)
2925 instr[13,10] = 1010
2926 instr[9,5] = V source
2927 instr[4,0] = V dest. */
2928
2929 int full = INSTR (30, 30);
2930 int second = INSTR (14, 14);
2931 unsigned vm = INSTR (20, 16);
2932 unsigned vn = INSTR (9, 5);
2933 unsigned vd = INSTR (4, 0);
2934 unsigned i;
2935
2936 NYI_assert (29, 24, 0x0E);
2937 NYI_assert (13, 10, 0xA);
2938
2939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2940 switch (INSTR (23, 22))
2941 {
2942 case 0:
2943 for (i = 0; i < (full ? 8 : 4); i++)
2944 {
2945 aarch64_set_vec_u8
2946 (cpu, vd, i * 2,
2947 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2948 aarch64_set_vec_u8
2949 (cpu, vd, 1 * 2 + 1,
2950 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2951 }
2952 break;
2953
2954 case 1:
2955 for (i = 0; i < (full ? 4 : 2); i++)
2956 {
2957 aarch64_set_vec_u16
2958 (cpu, vd, i * 2,
2959 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2960 aarch64_set_vec_u16
2961 (cpu, vd, 1 * 2 + 1,
2962 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2963 }
2964 break;
2965
2966 case 2:
2967 aarch64_set_vec_u32
2968 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2969 aarch64_set_vec_u32
2970 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2971 aarch64_set_vec_u32
2972 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2973 aarch64_set_vec_u32
2974 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2975 break;
2976
2977 case 3:
2978 if (! full)
2979 HALT_UNALLOC;
2980
2981 aarch64_set_vec_u64 (cpu, vd, 0,
2982 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2983 aarch64_set_vec_u64 (cpu, vd, 1,
2984 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2985 break;
2986 }
2987 }
2988
2989 static void
2990 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2991 {
2992 /* instr[31] = 0
2993 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2994 [must be 1 for 64-bit xfer]
2995 instr[29,20] = 00 1110 0000
2996 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2997 0100=> 32-bits. 1000=>64-bits
2998 instr[15,10] = 0000 11
2999 instr[9,5] = W source
3000 instr[4,0] = V dest. */
3001
3002 unsigned i;
3003 unsigned Vd = INSTR (4, 0);
3004 unsigned Rs = INSTR (9, 5);
3005 int both = INSTR (30, 30);
3006
3007 NYI_assert (29, 20, 0x0E0);
3008 NYI_assert (15, 10, 0x03);
3009
3010 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3011 switch (INSTR (19, 16))
3012 {
3013 case 1:
3014 for (i = 0; i < (both ? 16 : 8); i++)
3015 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
3016 break;
3017
3018 case 2:
3019 for (i = 0; i < (both ? 8 : 4); i++)
3020 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
3021 break;
3022
3023 case 4:
3024 for (i = 0; i < (both ? 4 : 2); i++)
3025 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
3026 break;
3027
3028 case 8:
3029 if (!both)
3030 HALT_NYI;
3031 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3032 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3033 break;
3034
3035 default:
3036 HALT_NYI;
3037 }
3038 }
3039
3040 static void
3041 do_vec_UZP (sim_cpu *cpu)
3042 {
3043 /* instr[31] = 0
3044 instr[30] = half(0)/full(1)
3045 instr[29,24] = 00 1110
3046 instr[23,22] = size: byte(00), half(01), word (10), long (11)
3047 instr[21] = 0
3048 instr[20,16] = Vm
3049 instr[15] = 0
3050 instr[14] = lower (0) / upper (1)
3051 instr[13,10] = 0110
3052 instr[9,5] = Vn
3053 instr[4,0] = Vd. */
3054
3055 int full = INSTR (30, 30);
3056 int upper = INSTR (14, 14);
3057
3058 unsigned vm = INSTR (20, 16);
3059 unsigned vn = INSTR (9, 5);
3060 unsigned vd = INSTR (4, 0);
3061
3062 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3063 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3064 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3065 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3066
3067 uint64_t val1;
3068 uint64_t val2;
3069
3070 uint64_t input2 = full ? val_n2 : val_m1;
3071
3072 NYI_assert (29, 24, 0x0E);
3073 NYI_assert (21, 21, 0);
3074 NYI_assert (15, 15, 0);
3075 NYI_assert (13, 10, 6);
3076
3077 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3078 switch (INSTR (23, 22))
3079 {
3080 case 0:
3081 val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
3082 val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3083 val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3084 val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3085
3086 val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3087 val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3088 val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3089 val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3090
3091 if (full)
3092 {
3093 val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
3094 val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3095 val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3096 val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3097
3098 val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3099 val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3100 val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3101 val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3102 }
3103 break;
3104
3105 case 1:
3106 val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3107 val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3108
3109 val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3110 val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3111
3112 if (full)
3113 {
3114 val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3115 val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3116
3117 val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3118 val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3119 }
3120 break;
3121
3122 case 2:
3123 val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3124 val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3125
3126 if (full)
3127 {
3128 val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3129 val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3130 }
3131 break;
3132
3133 case 3:
3134 if (! full)
3135 HALT_UNALLOC;
3136
3137 val1 = upper ? val_n2 : val_n1;
3138 val2 = upper ? val_m2 : val_m1;
3139 break;
3140 }
3141
3142 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3143 if (full)
3144 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3145 }
3146
3147 static void
3148 do_vec_ZIP (sim_cpu *cpu)
3149 {
3150 /* instr[31] = 0
3151 instr[30] = half(0)/full(1)
3152 instr[29,24] = 00 1110
3153 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3154 instr[21] = 0
3155 instr[20,16] = Vm
3156 instr[15] = 0
3157 instr[14] = lower (0) / upper (1)
3158 instr[13,10] = 1110
3159 instr[9,5] = Vn
3160 instr[4,0] = Vd. */
3161
3162 int full = INSTR (30, 30);
3163 int upper = INSTR (14, 14);
3164
3165 unsigned vm = INSTR (20, 16);
3166 unsigned vn = INSTR (9, 5);
3167 unsigned vd = INSTR (4, 0);
3168
3169 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3170 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3171 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3172 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3173
3174 uint64_t val1 = 0;
3175 uint64_t val2 = 0;
3176
3177 uint64_t input1 = upper ? val_n1 : val_m1;
3178 uint64_t input2 = upper ? val_n2 : val_m2;
3179
3180 NYI_assert (29, 24, 0x0E);
3181 NYI_assert (21, 21, 0);
3182 NYI_assert (15, 15, 0);
3183 NYI_assert (13, 10, 0xE);
3184
3185 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3186 switch (INSTR (23, 23))
3187 {
3188 case 0:
3189 val1 =
3190 ((input1 << 0) & (0xFF << 0))
3191 | ((input2 << 8) & (0xFF << 8))
3192 | ((input1 << 8) & (0xFF << 16))
3193 | ((input2 << 16) & (0xFF << 24))
3194 | ((input1 << 16) & (0xFFULL << 32))
3195 | ((input2 << 24) & (0xFFULL << 40))
3196 | ((input1 << 24) & (0xFFULL << 48))
3197 | ((input2 << 32) & (0xFFULL << 56));
3198
3199 val2 =
3200 ((input1 >> 32) & (0xFF << 0))
3201 | ((input2 >> 24) & (0xFF << 8))
3202 | ((input1 >> 24) & (0xFF << 16))
3203 | ((input2 >> 16) & (0xFF << 24))
3204 | ((input1 >> 16) & (0xFFULL << 32))
3205 | ((input2 >> 8) & (0xFFULL << 40))
3206 | ((input1 >> 8) & (0xFFULL << 48))
3207 | ((input2 >> 0) & (0xFFULL << 56));
3208 break;
3209
3210 case 1:
3211 val1 =
3212 ((input1 << 0) & (0xFFFF << 0))
3213 | ((input2 << 16) & (0xFFFF << 16))
3214 | ((input1 << 16) & (0xFFFFULL << 32))
3215 | ((input2 << 32) & (0xFFFFULL << 48));
3216
3217 val2 =
3218 ((input1 >> 32) & (0xFFFF << 0))
3219 | ((input2 >> 16) & (0xFFFF << 16))
3220 | ((input1 >> 16) & (0xFFFFULL << 32))
3221 | ((input2 >> 0) & (0xFFFFULL << 48));
3222 break;
3223
3224 case 2:
3225 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3226 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3227 break;
3228
3229 case 3:
3230 val1 = input1;
3231 val2 = input2;
3232 break;
3233 }
3234
3235 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3236 if (full)
3237 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3238 }
3239
3240 /* Floating point immediates are encoded in 8 bits.
3241 fpimm[7] = sign bit.
3242 fpimm[6:4] = signed exponent.
3243 fpimm[3:0] = fraction (assuming leading 1).
3244 i.e. F = s * 1.f * 2^(e - b). */
3245
3246 static float
3247 fp_immediate_for_encoding_32 (uint32_t imm8)
3248 {
3249 float u;
3250 uint32_t s, e, f, i;
3251
3252 s = (imm8 >> 7) & 0x1;
3253 e = (imm8 >> 4) & 0x7;
3254 f = imm8 & 0xf;
3255
3256 /* The fp value is s * n/16 * 2r where n is 16+e. */
3257 u = (16.0 + f) / 16.0;
3258
3259 /* N.B. exponent is signed. */
3260 if (e < 4)
3261 {
3262 int epos = e;
3263
3264 for (i = 0; i <= epos; i++)
3265 u *= 2.0;
3266 }
3267 else
3268 {
3269 int eneg = 7 - e;
3270
3271 for (i = 0; i < eneg; i++)
3272 u /= 2.0;
3273 }
3274
3275 if (s)
3276 u = - u;
3277
3278 return u;
3279 }
3280
3281 static double
3282 fp_immediate_for_encoding_64 (uint32_t imm8)
3283 {
3284 double u;
3285 uint32_t s, e, f, i;
3286
3287 s = (imm8 >> 7) & 0x1;
3288 e = (imm8 >> 4) & 0x7;
3289 f = imm8 & 0xf;
3290
3291 /* The fp value is s * n/16 * 2r where n is 16+e. */
3292 u = (16.0 + f) / 16.0;
3293
3294 /* N.B. exponent is signed. */
3295 if (e < 4)
3296 {
3297 int epos = e;
3298
3299 for (i = 0; i <= epos; i++)
3300 u *= 2.0;
3301 }
3302 else
3303 {
3304 int eneg = 7 - e;
3305
3306 for (i = 0; i < eneg; i++)
3307 u /= 2.0;
3308 }
3309
3310 if (s)
3311 u = - u;
3312
3313 return u;
3314 }
3315
3316 static void
3317 do_vec_MOV_immediate (sim_cpu *cpu)
3318 {
3319 /* instr[31] = 0
3320 instr[30] = full/half selector
3321 instr[29,19] = 00111100000
3322 instr[18,16] = high 3 bits of uimm8
3323 instr[15,12] = size & shift:
3324 0000 => 32-bit
3325 0010 => 32-bit + LSL#8
3326 0100 => 32-bit + LSL#16
3327 0110 => 32-bit + LSL#24
3328 1010 => 16-bit + LSL#8
3329 1000 => 16-bit
3330 1101 => 32-bit + MSL#16
3331 1100 => 32-bit + MSL#8
3332 1110 => 8-bit
3333 1111 => double
3334 instr[11,10] = 01
3335 instr[9,5] = low 5-bits of uimm8
3336 instr[4,0] = Vd. */
3337
3338 int full = INSTR (30, 30);
3339 unsigned vd = INSTR (4, 0);
3340 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3341 unsigned i;
3342
3343 NYI_assert (29, 19, 0x1E0);
3344 NYI_assert (11, 10, 1);
3345
3346 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3347 switch (INSTR (15, 12))
3348 {
3349 case 0x0: /* 32-bit, no shift. */
3350 case 0x2: /* 32-bit, shift by 8. */
3351 case 0x4: /* 32-bit, shift by 16. */
3352 case 0x6: /* 32-bit, shift by 24. */
3353 val <<= (8 * INSTR (14, 13));
3354 for (i = 0; i < (full ? 4 : 2); i++)
3355 aarch64_set_vec_u32 (cpu, vd, i, val);
3356 break;
3357
3358 case 0xa: /* 16-bit, shift by 8. */
3359 val <<= 8;
3360 /* Fall through. */
3361 case 0x8: /* 16-bit, no shift. */
3362 for (i = 0; i < (full ? 8 : 4); i++)
3363 aarch64_set_vec_u16 (cpu, vd, i, val);
3364 break;
3365
3366 case 0xd: /* 32-bit, mask shift by 16. */
3367 val <<= 8;
3368 val |= 0xFF;
3369 /* Fall through. */
3370 case 0xc: /* 32-bit, mask shift by 8. */
3371 val <<= 8;
3372 val |= 0xFF;
3373 for (i = 0; i < (full ? 4 : 2); i++)
3374 aarch64_set_vec_u32 (cpu, vd, i, val);
3375 break;
3376
3377 case 0xe: /* 8-bit, no shift. */
3378 for (i = 0; i < (full ? 16 : 8); i++)
3379 aarch64_set_vec_u8 (cpu, vd, i, val);
3380 break;
3381
3382 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3383 {
3384 float u = fp_immediate_for_encoding_32 (val);
3385 for (i = 0; i < (full ? 4 : 2); i++)
3386 aarch64_set_vec_float (cpu, vd, i, u);
3387 break;
3388 }
3389
3390 default:
3391 HALT_NYI;
3392 }
3393 }
3394
3395 static void
3396 do_vec_MVNI (sim_cpu *cpu)
3397 {
3398 /* instr[31] = 0
3399 instr[30] = full/half selector
3400 instr[29,19] = 10111100000
3401 instr[18,16] = high 3 bits of uimm8
3402 instr[15,12] = selector
3403 instr[11,10] = 01
3404 instr[9,5] = low 5-bits of uimm8
3405 instr[4,0] = Vd. */
3406
3407 int full = INSTR (30, 30);
3408 unsigned vd = INSTR (4, 0);
3409 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3410 unsigned i;
3411
3412 NYI_assert (29, 19, 0x5E0);
3413 NYI_assert (11, 10, 1);
3414
3415 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3416 switch (INSTR (15, 12))
3417 {
3418 case 0x0: /* 32-bit, no shift. */
3419 case 0x2: /* 32-bit, shift by 8. */
3420 case 0x4: /* 32-bit, shift by 16. */
3421 case 0x6: /* 32-bit, shift by 24. */
3422 val <<= (8 * INSTR (14, 13));
3423 val = ~ val;
3424 for (i = 0; i < (full ? 4 : 2); i++)
3425 aarch64_set_vec_u32 (cpu, vd, i, val);
3426 return;
3427
3428 case 0xa: /* 16-bit, 8 bit shift. */
3429 val <<= 8;
3430 case 0x8: /* 16-bit, no shift. */
3431 val = ~ val;
3432 for (i = 0; i < (full ? 8 : 4); i++)
3433 aarch64_set_vec_u16 (cpu, vd, i, val);
3434 return;
3435
3436 case 0xd: /* 32-bit, mask shift by 16. */
3437 val <<= 8;
3438 val |= 0xFF;
3439 case 0xc: /* 32-bit, mask shift by 8. */
3440 val <<= 8;
3441 val |= 0xFF;
3442 val = ~ val;
3443 for (i = 0; i < (full ? 4 : 2); i++)
3444 aarch64_set_vec_u32 (cpu, vd, i, val);
3445 return;
3446
3447 case 0xE: /* MOVI Dn, #mask64 */
3448 {
3449 uint64_t mask = 0;
3450
3451 for (i = 0; i < 8; i++)
3452 if (val & (1 << i))
3453 mask |= (0xFFUL << (i * 8));
3454 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3455 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3456 return;
3457 }
3458
3459 case 0xf: /* FMOV Vd.2D, #fpimm. */
3460 {
3461 double u = fp_immediate_for_encoding_64 (val);
3462
3463 if (! full)
3464 HALT_UNALLOC;
3465
3466 aarch64_set_vec_double (cpu, vd, 0, u);
3467 aarch64_set_vec_double (cpu, vd, 1, u);
3468 return;
3469 }
3470
3471 default:
3472 HALT_NYI;
3473 }
3474 }
3475
3476 #define ABS(A) ((A) < 0 ? - (A) : (A))
3477
3478 static void
3479 do_vec_ABS (sim_cpu *cpu)
3480 {
3481 /* instr[31] = 0
3482 instr[30] = half(0)/full(1)
3483 instr[29,24] = 00 1110
3484 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3485 instr[21,10] = 10 0000 1011 10
3486 instr[9,5] = Vn
3487 instr[4.0] = Vd. */
3488
3489 unsigned vn = INSTR (9, 5);
3490 unsigned vd = INSTR (4, 0);
3491 unsigned full = INSTR (30, 30);
3492 unsigned i;
3493
3494 NYI_assert (29, 24, 0x0E);
3495 NYI_assert (21, 10, 0x82E);
3496
3497 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3498 switch (INSTR (23, 22))
3499 {
3500 case 0:
3501 for (i = 0; i < (full ? 16 : 8); i++)
3502 aarch64_set_vec_s8 (cpu, vd, i,
3503 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3504 break;
3505
3506 case 1:
3507 for (i = 0; i < (full ? 8 : 4); i++)
3508 aarch64_set_vec_s16 (cpu, vd, i,
3509 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3510 break;
3511
3512 case 2:
3513 for (i = 0; i < (full ? 4 : 2); i++)
3514 aarch64_set_vec_s32 (cpu, vd, i,
3515 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3516 break;
3517
3518 case 3:
3519 if (! full)
3520 HALT_NYI;
3521 for (i = 0; i < 2; i++)
3522 aarch64_set_vec_s64 (cpu, vd, i,
3523 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3524 break;
3525 }
3526 }
3527
3528 static void
3529 do_vec_ADDV (sim_cpu *cpu)
3530 {
3531 /* instr[31] = 0
3532 instr[30] = full/half selector
3533 instr[29,24] = 00 1110
3534 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3535 instr[21,10] = 11 0001 1011 10
3536 instr[9,5] = Vm
3537 instr[4.0] = Rd. */
3538
3539 unsigned vm = INSTR (9, 5);
3540 unsigned rd = INSTR (4, 0);
3541 unsigned i;
3542 int full = INSTR (30, 30);
3543
3544 NYI_assert (29, 24, 0x0E);
3545 NYI_assert (21, 10, 0xC6E);
3546
3547 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3548 switch (INSTR (23, 22))
3549 {
3550 case 0:
3551 {
3552 uint8_t val = 0;
3553 for (i = 0; i < (full ? 16 : 8); i++)
3554 val += aarch64_get_vec_u8 (cpu, vm, i);
3555 aarch64_set_vec_u64 (cpu, rd, 0, val);
3556 return;
3557 }
3558
3559 case 1:
3560 {
3561 uint16_t val = 0;
3562 for (i = 0; i < (full ? 8 : 4); i++)
3563 val += aarch64_get_vec_u16 (cpu, vm, i);
3564 aarch64_set_vec_u64 (cpu, rd, 0, val);
3565 return;
3566 }
3567
3568 case 2:
3569 {
3570 uint32_t val = 0;
3571 if (! full)
3572 HALT_UNALLOC;
3573 for (i = 0; i < 4; i++)
3574 val += aarch64_get_vec_u32 (cpu, vm, i);
3575 aarch64_set_vec_u64 (cpu, rd, 0, val);
3576 return;
3577 }
3578
3579 case 3:
3580 HALT_UNALLOC;
3581 }
3582 }
3583
3584 static void
3585 do_vec_ins_2 (sim_cpu *cpu)
3586 {
3587 /* instr[31,21] = 01001110000
3588 instr[20,18] = size & element selector
3589 instr[17,14] = 0000
3590 instr[13] = direction: to vec(0), from vec (1)
3591 instr[12,10] = 111
3592 instr[9,5] = Vm
3593 instr[4,0] = Vd. */
3594
3595 unsigned elem;
3596 unsigned vm = INSTR (9, 5);
3597 unsigned vd = INSTR (4, 0);
3598
3599 NYI_assert (31, 21, 0x270);
3600 NYI_assert (17, 14, 0);
3601 NYI_assert (12, 10, 7);
3602
3603 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3604 if (INSTR (13, 13) == 1)
3605 {
3606 if (INSTR (18, 18) == 1)
3607 {
3608 /* 32-bit moves. */
3609 elem = INSTR (20, 19);
3610 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3611 aarch64_get_vec_u32 (cpu, vm, elem));
3612 }
3613 else
3614 {
3615 /* 64-bit moves. */
3616 if (INSTR (19, 19) != 1)
3617 HALT_NYI;
3618
3619 elem = INSTR (20, 20);
3620 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3621 aarch64_get_vec_u64 (cpu, vm, elem));
3622 }
3623 }
3624 else
3625 {
3626 if (INSTR (18, 18) == 1)
3627 {
3628 /* 32-bit moves. */
3629 elem = INSTR (20, 19);
3630 aarch64_set_vec_u32 (cpu, vd, elem,
3631 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3632 }
3633 else
3634 {
3635 /* 64-bit moves. */
3636 if (INSTR (19, 19) != 1)
3637 HALT_NYI;
3638
3639 elem = INSTR (20, 20);
3640 aarch64_set_vec_u64 (cpu, vd, elem,
3641 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3642 }
3643 }
3644 }
3645
3646 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3647 do \
3648 { \
3649 DST_TYPE a[N], b[N]; \
3650 \
3651 for (i = 0; i < (N); i++) \
3652 { \
3653 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3654 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3655 } \
3656 for (i = 0; i < (N); i++) \
3657 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3658 } \
3659 while (0)
3660
3661 static void
3662 do_vec_mull (sim_cpu *cpu)
3663 {
3664 /* instr[31] = 0
3665 instr[30] = lower(0)/upper(1) selector
3666 instr[29] = signed(0)/unsigned(1)
3667 instr[28,24] = 0 1110
3668 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3669 instr[21] = 1
3670 instr[20,16] = Vm
3671 instr[15,10] = 11 0000
3672 instr[9,5] = Vn
3673 instr[4.0] = Vd. */
3674
3675 int unsign = INSTR (29, 29);
3676 int bias = INSTR (30, 30);
3677 unsigned vm = INSTR (20, 16);
3678 unsigned vn = INSTR ( 9, 5);
3679 unsigned vd = INSTR ( 4, 0);
3680 unsigned i;
3681
3682 NYI_assert (28, 24, 0x0E);
3683 NYI_assert (15, 10, 0x30);
3684
3685 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3686 /* NB: Read source values before writing results, in case
3687 the source and destination vectors are the same. */
3688 switch (INSTR (23, 22))
3689 {
3690 case 0:
3691 if (bias)
3692 bias = 8;
3693 if (unsign)
3694 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3695 else
3696 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3697 return;
3698
3699 case 1:
3700 if (bias)
3701 bias = 4;
3702 if (unsign)
3703 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3704 else
3705 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3706 return;
3707
3708 case 2:
3709 if (bias)
3710 bias = 2;
3711 if (unsign)
3712 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3713 else
3714 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3715 return;
3716
3717 case 3:
3718 HALT_NYI;
3719 }
3720 }
3721
3722 static void
3723 do_vec_fadd (sim_cpu *cpu)
3724 {
3725 /* instr[31] = 0
3726 instr[30] = half(0)/full(1)
3727 instr[29,24] = 001110
3728 instr[23] = FADD(0)/FSUB(1)
3729 instr[22] = float (0)/double(1)
3730 instr[21] = 1
3731 instr[20,16] = Vm
3732 instr[15,10] = 110101
3733 instr[9,5] = Vn
3734 instr[4.0] = Vd. */
3735
3736 unsigned vm = INSTR (20, 16);
3737 unsigned vn = INSTR (9, 5);
3738 unsigned vd = INSTR (4, 0);
3739 unsigned i;
3740 int full = INSTR (30, 30);
3741
3742 NYI_assert (29, 24, 0x0E);
3743 NYI_assert (21, 21, 1);
3744 NYI_assert (15, 10, 0x35);
3745
3746 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3747 if (INSTR (23, 23))
3748 {
3749 if (INSTR (22, 22))
3750 {
3751 if (! full)
3752 HALT_NYI;
3753
3754 for (i = 0; i < 2; i++)
3755 aarch64_set_vec_double (cpu, vd, i,
3756 aarch64_get_vec_double (cpu, vn, i)
3757 - aarch64_get_vec_double (cpu, vm, i));
3758 }
3759 else
3760 {
3761 for (i = 0; i < (full ? 4 : 2); i++)
3762 aarch64_set_vec_float (cpu, vd, i,
3763 aarch64_get_vec_float (cpu, vn, i)
3764 - aarch64_get_vec_float (cpu, vm, i));
3765 }
3766 }
3767 else
3768 {
3769 if (INSTR (22, 22))
3770 {
3771 if (! full)
3772 HALT_NYI;
3773
3774 for (i = 0; i < 2; i++)
3775 aarch64_set_vec_double (cpu, vd, i,
3776 aarch64_get_vec_double (cpu, vm, i)
3777 + aarch64_get_vec_double (cpu, vn, i));
3778 }
3779 else
3780 {
3781 for (i = 0; i < (full ? 4 : 2); i++)
3782 aarch64_set_vec_float (cpu, vd, i,
3783 aarch64_get_vec_float (cpu, vm, i)
3784 + aarch64_get_vec_float (cpu, vn, i));
3785 }
3786 }
3787 }
3788
3789 static void
3790 do_vec_add (sim_cpu *cpu)
3791 {
3792 /* instr[31] = 0
3793 instr[30] = full/half selector
3794 instr[29,24] = 001110
3795 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3796 instr[21] = 1
3797 instr[20,16] = Vn
3798 instr[15,10] = 100001
3799 instr[9,5] = Vm
3800 instr[4.0] = Vd. */
3801
3802 unsigned vm = INSTR (20, 16);
3803 unsigned vn = INSTR (9, 5);
3804 unsigned vd = INSTR (4, 0);
3805 unsigned i;
3806 int full = INSTR (30, 30);
3807
3808 NYI_assert (29, 24, 0x0E);
3809 NYI_assert (21, 21, 1);
3810 NYI_assert (15, 10, 0x21);
3811
3812 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3813 switch (INSTR (23, 22))
3814 {
3815 case 0:
3816 for (i = 0; i < (full ? 16 : 8); i++)
3817 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3818 + aarch64_get_vec_u8 (cpu, vm, i));
3819 return;
3820
3821 case 1:
3822 for (i = 0; i < (full ? 8 : 4); i++)
3823 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3824 + aarch64_get_vec_u16 (cpu, vm, i));
3825 return;
3826
3827 case 2:
3828 for (i = 0; i < (full ? 4 : 2); i++)
3829 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3830 + aarch64_get_vec_u32 (cpu, vm, i));
3831 return;
3832
3833 case 3:
3834 if (! full)
3835 HALT_UNALLOC;
3836 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3837 + aarch64_get_vec_u64 (cpu, vm, 0));
3838 aarch64_set_vec_u64 (cpu, vd, 1,
3839 aarch64_get_vec_u64 (cpu, vn, 1)
3840 + aarch64_get_vec_u64 (cpu, vm, 1));
3841 return;
3842 }
3843 }
3844
3845 static void
3846 do_vec_mul (sim_cpu *cpu)
3847 {
3848 /* instr[31] = 0
3849 instr[30] = full/half selector
3850 instr[29,24] = 00 1110
3851 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3852 instr[21] = 1
3853 instr[20,16] = Vn
3854 instr[15,10] = 10 0111
3855 instr[9,5] = Vm
3856 instr[4.0] = Vd. */
3857
3858 unsigned vm = INSTR (20, 16);
3859 unsigned vn = INSTR (9, 5);
3860 unsigned vd = INSTR (4, 0);
3861 unsigned i;
3862 int full = INSTR (30, 30);
3863 int bias = 0;
3864
3865 NYI_assert (29, 24, 0x0E);
3866 NYI_assert (21, 21, 1);
3867 NYI_assert (15, 10, 0x27);
3868
3869 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3870 switch (INSTR (23, 22))
3871 {
3872 case 0:
3873 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3874 return;
3875
3876 case 1:
3877 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3878 return;
3879
3880 case 2:
3881 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3882 return;
3883
3884 case 3:
3885 HALT_UNALLOC;
3886 }
3887 }
3888
3889 static void
3890 do_vec_MLA (sim_cpu *cpu)
3891 {
3892 /* instr[31] = 0
3893 instr[30] = full/half selector
3894 instr[29,24] = 00 1110
3895 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3896 instr[21] = 1
3897 instr[20,16] = Vn
3898 instr[15,10] = 1001 01
3899 instr[9,5] = Vm
3900 instr[4.0] = Vd. */
3901
3902 unsigned vm = INSTR (20, 16);
3903 unsigned vn = INSTR (9, 5);
3904 unsigned vd = INSTR (4, 0);
3905 unsigned i;
3906 int full = INSTR (30, 30);
3907
3908 NYI_assert (29, 24, 0x0E);
3909 NYI_assert (21, 21, 1);
3910 NYI_assert (15, 10, 0x25);
3911
3912 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3913 switch (INSTR (23, 22))
3914 {
3915 case 0:
3916 for (i = 0; i < (full ? 16 : 8); i++)
3917 aarch64_set_vec_u8 (cpu, vd, i,
3918 aarch64_get_vec_u8 (cpu, vd, i)
3919 + (aarch64_get_vec_u8 (cpu, vn, i)
3920 * aarch64_get_vec_u8 (cpu, vm, i)));
3921 return;
3922
3923 case 1:
3924 for (i = 0; i < (full ? 8 : 4); i++)
3925 aarch64_set_vec_u16 (cpu, vd, i,
3926 aarch64_get_vec_u16 (cpu, vd, i)
3927 + (aarch64_get_vec_u16 (cpu, vn, i)
3928 * aarch64_get_vec_u16 (cpu, vm, i)));
3929 return;
3930
3931 case 2:
3932 for (i = 0; i < (full ? 4 : 2); i++)
3933 aarch64_set_vec_u32 (cpu, vd, i,
3934 aarch64_get_vec_u32 (cpu, vd, i)
3935 + (aarch64_get_vec_u32 (cpu, vn, i)
3936 * aarch64_get_vec_u32 (cpu, vm, i)));
3937 return;
3938
3939 default:
3940 HALT_UNALLOC;
3941 }
3942 }
3943
3944 static float
3945 fmaxnm (float a, float b)
3946 {
3947 if (! isnan (a))
3948 {
3949 if (! isnan (b))
3950 return a > b ? a : b;
3951 return a;
3952 }
3953 else if (! isnan (b))
3954 return b;
3955 return a;
3956 }
3957
3958 static float
3959 fminnm (float a, float b)
3960 {
3961 if (! isnan (a))
3962 {
3963 if (! isnan (b))
3964 return a < b ? a : b;
3965 return a;
3966 }
3967 else if (! isnan (b))
3968 return b;
3969 return a;
3970 }
3971
3972 static double
3973 dmaxnm (double a, double b)
3974 {
3975 if (! isnan (a))
3976 {
3977 if (! isnan (b))
3978 return a > b ? a : b;
3979 return a;
3980 }
3981 else if (! isnan (b))
3982 return b;
3983 return a;
3984 }
3985
3986 static double
3987 dminnm (double a, double b)
3988 {
3989 if (! isnan (a))
3990 {
3991 if (! isnan (b))
3992 return a < b ? a : b;
3993 return a;
3994 }
3995 else if (! isnan (b))
3996 return b;
3997 return a;
3998 }
3999
4000 static void
4001 do_vec_FminmaxNMP (sim_cpu *cpu)
4002 {
4003 /* instr [31] = 0
4004 instr [30] = half (0)/full (1)
4005 instr [29,24] = 10 1110
4006 instr [23] = max(0)/min(1)
4007 instr [22] = float (0)/double (1)
4008 instr [21] = 1
4009 instr [20,16] = Vn
4010 instr [15,10] = 1100 01
4011 instr [9,5] = Vm
4012 instr [4.0] = Vd. */
4013
4014 unsigned vm = INSTR (20, 16);
4015 unsigned vn = INSTR (9, 5);
4016 unsigned vd = INSTR (4, 0);
4017 int full = INSTR (30, 30);
4018
4019 NYI_assert (29, 24, 0x2E);
4020 NYI_assert (21, 21, 1);
4021 NYI_assert (15, 10, 0x31);
4022
4023 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4024 if (INSTR (22, 22))
4025 {
4026 double (* fn)(double, double) = INSTR (23, 23)
4027 ? dminnm : dmaxnm;
4028
4029 if (! full)
4030 HALT_NYI;
4031 aarch64_set_vec_double (cpu, vd, 0,
4032 fn (aarch64_get_vec_double (cpu, vn, 0),
4033 aarch64_get_vec_double (cpu, vn, 1)));
4034 aarch64_set_vec_double (cpu, vd, 0,
4035 fn (aarch64_get_vec_double (cpu, vm, 0),
4036 aarch64_get_vec_double (cpu, vm, 1)));
4037 }
4038 else
4039 {
4040 float (* fn)(float, float) = INSTR (23, 23)
4041 ? fminnm : fmaxnm;
4042
4043 aarch64_set_vec_float (cpu, vd, 0,
4044 fn (aarch64_get_vec_float (cpu, vn, 0),
4045 aarch64_get_vec_float (cpu, vn, 1)));
4046 if (full)
4047 aarch64_set_vec_float (cpu, vd, 1,
4048 fn (aarch64_get_vec_float (cpu, vn, 2),
4049 aarch64_get_vec_float (cpu, vn, 3)));
4050
4051 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
4052 fn (aarch64_get_vec_float (cpu, vm, 0),
4053 aarch64_get_vec_float (cpu, vm, 1)));
4054 if (full)
4055 aarch64_set_vec_float (cpu, vd, 3,
4056 fn (aarch64_get_vec_float (cpu, vm, 2),
4057 aarch64_get_vec_float (cpu, vm, 3)));
4058 }
4059 }
4060
4061 static void
4062 do_vec_AND (sim_cpu *cpu)
4063 {
4064 /* instr[31] = 0
4065 instr[30] = half (0)/full (1)
4066 instr[29,21] = 001110001
4067 instr[20,16] = Vm
4068 instr[15,10] = 000111
4069 instr[9,5] = Vn
4070 instr[4.0] = Vd. */
4071
4072 unsigned vm = INSTR (20, 16);
4073 unsigned vn = INSTR (9, 5);
4074 unsigned vd = INSTR (4, 0);
4075 unsigned i;
4076 int full = INSTR (30, 30);
4077
4078 NYI_assert (29, 21, 0x071);
4079 NYI_assert (15, 10, 0x07);
4080
4081 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4082 for (i = 0; i < (full ? 4 : 2); i++)
4083 aarch64_set_vec_u32 (cpu, vd, i,
4084 aarch64_get_vec_u32 (cpu, vn, i)
4085 & aarch64_get_vec_u32 (cpu, vm, i));
4086 }
4087
4088 static void
4089 do_vec_BSL (sim_cpu *cpu)
4090 {
4091 /* instr[31] = 0
4092 instr[30] = half (0)/full (1)
4093 instr[29,21] = 101110011
4094 instr[20,16] = Vm
4095 instr[15,10] = 000111
4096 instr[9,5] = Vn
4097 instr[4.0] = Vd. */
4098
4099 unsigned vm = INSTR (20, 16);
4100 unsigned vn = INSTR (9, 5);
4101 unsigned vd = INSTR (4, 0);
4102 unsigned i;
4103 int full = INSTR (30, 30);
4104
4105 NYI_assert (29, 21, 0x173);
4106 NYI_assert (15, 10, 0x07);
4107
4108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4109 for (i = 0; i < (full ? 16 : 8); i++)
4110 aarch64_set_vec_u8 (cpu, vd, i,
4111 ( aarch64_get_vec_u8 (cpu, vd, i)
4112 & aarch64_get_vec_u8 (cpu, vn, i))
4113 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4114 & aarch64_get_vec_u8 (cpu, vm, i)));
4115 }
4116
4117 static void
4118 do_vec_EOR (sim_cpu *cpu)
4119 {
4120 /* instr[31] = 0
4121 instr[30] = half (0)/full (1)
4122 instr[29,21] = 10 1110 001
4123 instr[20,16] = Vm
4124 instr[15,10] = 000111
4125 instr[9,5] = Vn
4126 instr[4.0] = Vd. */
4127
4128 unsigned vm = INSTR (20, 16);
4129 unsigned vn = INSTR (9, 5);
4130 unsigned vd = INSTR (4, 0);
4131 unsigned i;
4132 int full = INSTR (30, 30);
4133
4134 NYI_assert (29, 21, 0x171);
4135 NYI_assert (15, 10, 0x07);
4136
4137 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4138 for (i = 0; i < (full ? 4 : 2); i++)
4139 aarch64_set_vec_u32 (cpu, vd, i,
4140 aarch64_get_vec_u32 (cpu, vn, i)
4141 ^ aarch64_get_vec_u32 (cpu, vm, i));
4142 }
4143
4144 static void
4145 do_vec_bit (sim_cpu *cpu)
4146 {
4147 /* instr[31] = 0
4148 instr[30] = half (0)/full (1)
4149 instr[29,23] = 10 1110 1
4150 instr[22] = BIT (0) / BIF (1)
4151 instr[21] = 1
4152 instr[20,16] = Vm
4153 instr[15,10] = 0001 11
4154 instr[9,5] = Vn
4155 instr[4.0] = Vd. */
4156
4157 unsigned vm = INSTR (20, 16);
4158 unsigned vn = INSTR (9, 5);
4159 unsigned vd = INSTR (4, 0);
4160 unsigned full = INSTR (30, 30);
4161 unsigned test_false = INSTR (22, 22);
4162 unsigned i;
4163
4164 NYI_assert (29, 23, 0x5D);
4165 NYI_assert (21, 21, 1);
4166 NYI_assert (15, 10, 0x07);
4167
4168 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4169 for (i = 0; i < (full ? 4 : 2); i++)
4170 {
4171 uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
4172 uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
4173 uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
4174 if (test_false)
4175 aarch64_set_vec_u32 (cpu, vd, i,
4176 (vd_val & vm_val) | (vn_val & ~vm_val));
4177 else
4178 aarch64_set_vec_u32 (cpu, vd, i,
4179 (vd_val & ~vm_val) | (vn_val & vm_val));
4180 }
4181 }
4182
4183 static void
4184 do_vec_ORN (sim_cpu *cpu)
4185 {
4186 /* instr[31] = 0
4187 instr[30] = half (0)/full (1)
4188 instr[29,21] = 00 1110 111
4189 instr[20,16] = Vm
4190 instr[15,10] = 00 0111
4191 instr[9,5] = Vn
4192 instr[4.0] = Vd. */
4193
4194 unsigned vm = INSTR (20, 16);
4195 unsigned vn = INSTR (9, 5);
4196 unsigned vd = INSTR (4, 0);
4197 unsigned i;
4198 int full = INSTR (30, 30);
4199
4200 NYI_assert (29, 21, 0x077);
4201 NYI_assert (15, 10, 0x07);
4202
4203 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4204 for (i = 0; i < (full ? 16 : 8); i++)
4205 aarch64_set_vec_u8 (cpu, vd, i,
4206 aarch64_get_vec_u8 (cpu, vn, i)
4207 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4208 }
4209
4210 static void
4211 do_vec_ORR (sim_cpu *cpu)
4212 {
4213 /* instr[31] = 0
4214 instr[30] = half (0)/full (1)
4215 instr[29,21] = 00 1110 101
4216 instr[20,16] = Vm
4217 instr[15,10] = 0001 11
4218 instr[9,5] = Vn
4219 instr[4.0] = Vd. */
4220
4221 unsigned vm = INSTR (20, 16);
4222 unsigned vn = INSTR (9, 5);
4223 unsigned vd = INSTR (4, 0);
4224 unsigned i;
4225 int full = INSTR (30, 30);
4226
4227 NYI_assert (29, 21, 0x075);
4228 NYI_assert (15, 10, 0x07);
4229
4230 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4231 for (i = 0; i < (full ? 16 : 8); i++)
4232 aarch64_set_vec_u8 (cpu, vd, i,
4233 aarch64_get_vec_u8 (cpu, vn, i)
4234 | aarch64_get_vec_u8 (cpu, vm, i));
4235 }
4236
4237 static void
4238 do_vec_BIC (sim_cpu *cpu)
4239 {
4240 /* instr[31] = 0
4241 instr[30] = half (0)/full (1)
4242 instr[29,21] = 00 1110 011
4243 instr[20,16] = Vm
4244 instr[15,10] = 00 0111
4245 instr[9,5] = Vn
4246 instr[4.0] = Vd. */
4247
4248 unsigned vm = INSTR (20, 16);
4249 unsigned vn = INSTR (9, 5);
4250 unsigned vd = INSTR (4, 0);
4251 unsigned i;
4252 int full = INSTR (30, 30);
4253
4254 NYI_assert (29, 21, 0x073);
4255 NYI_assert (15, 10, 0x07);
4256
4257 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4258 for (i = 0; i < (full ? 16 : 8); i++)
4259 aarch64_set_vec_u8 (cpu, vd, i,
4260 aarch64_get_vec_u8 (cpu, vn, i)
4261 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4262 }
4263
4264 static void
4265 do_vec_XTN (sim_cpu *cpu)
4266 {
4267 /* instr[31] = 0
4268 instr[30] = first part (0)/ second part (1)
4269 instr[29,24] = 00 1110
4270 instr[23,22] = size: byte(00), half(01), word (10)
4271 instr[21,10] = 1000 0100 1010
4272 instr[9,5] = Vs
4273 instr[4,0] = Vd. */
4274
4275 unsigned vs = INSTR (9, 5);
4276 unsigned vd = INSTR (4, 0);
4277 unsigned bias = INSTR (30, 30);
4278 unsigned i;
4279
4280 NYI_assert (29, 24, 0x0E);
4281 NYI_assert (21, 10, 0x84A);
4282
4283 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4284 switch (INSTR (23, 22))
4285 {
4286 case 0:
4287 for (i = 0; i < 8; i++)
4288 aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4289 aarch64_get_vec_u16 (cpu, vs, i));
4290 return;
4291
4292 case 1:
4293 for (i = 0; i < 4; i++)
4294 aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4295 aarch64_get_vec_u32 (cpu, vs, i));
4296 return;
4297
4298 case 2:
4299 for (i = 0; i < 2; i++)
4300 aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4301 aarch64_get_vec_u64 (cpu, vs, i));
4302 return;
4303 }
4304 }
4305
4306 /* Return the number of bits set in the input value. */
4307 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
4308 # define popcount __builtin_popcount
4309 #else
4310 static int
4311 popcount (unsigned char x)
4312 {
4313 static const unsigned char popcnt[16] =
4314 {
4315 0, 1, 1, 2,
4316 1, 2, 2, 3,
4317 1, 2, 2, 3,
4318 2, 3, 3, 4
4319 };
4320
4321 /* Only counts the low 8 bits of the input as that is all we need. */
4322 return popcnt[x % 16] + popcnt[x / 16];
4323 }
4324 #endif
4325
4326 static void
4327 do_vec_CNT (sim_cpu *cpu)
4328 {
4329 /* instr[31] = 0
4330 instr[30] = half (0)/ full (1)
4331 instr[29,24] = 00 1110
4332 instr[23,22] = size: byte(00)
4333 instr[21,10] = 1000 0001 0110
4334 instr[9,5] = Vs
4335 instr[4,0] = Vd. */
4336
4337 unsigned vs = INSTR (9, 5);
4338 unsigned vd = INSTR (4, 0);
4339 int full = INSTR (30, 30);
4340 int size = INSTR (23, 22);
4341 int i;
4342
4343 NYI_assert (29, 24, 0x0E);
4344 NYI_assert (21, 10, 0x816);
4345
4346 if (size != 0)
4347 HALT_UNALLOC;
4348
4349 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4350
4351 for (i = 0; i < (full ? 16 : 8); i++)
4352 aarch64_set_vec_u8 (cpu, vd, i,
4353 popcount (aarch64_get_vec_u8 (cpu, vs, i)));
4354 }
4355
4356 static void
4357 do_vec_maxv (sim_cpu *cpu)
4358 {
4359 /* instr[31] = 0
4360 instr[30] = half(0)/full(1)
4361 instr[29] = signed (0)/unsigned(1)
4362 instr[28,24] = 0 1110
4363 instr[23,22] = size: byte(00), half(01), word (10)
4364 instr[21] = 1
4365 instr[20,17] = 1 000
4366 instr[16] = max(0)/min(1)
4367 instr[15,10] = 1010 10
4368 instr[9,5] = V source
4369 instr[4.0] = R dest. */
4370
4371 unsigned vs = INSTR (9, 5);
4372 unsigned rd = INSTR (4, 0);
4373 unsigned full = INSTR (30, 30);
4374 unsigned i;
4375
4376 NYI_assert (28, 24, 0x0E);
4377 NYI_assert (21, 21, 1);
4378 NYI_assert (20, 17, 8);
4379 NYI_assert (15, 10, 0x2A);
4380
4381 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4382 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4383 {
4384 case 0: /* SMAXV. */
4385 {
4386 int64_t smax;
4387 switch (INSTR (23, 22))
4388 {
4389 case 0:
4390 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4391 for (i = 1; i < (full ? 16 : 8); i++)
4392 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4393 break;
4394 case 1:
4395 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4396 for (i = 1; i < (full ? 8 : 4); i++)
4397 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4398 break;
4399 case 2:
4400 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4401 for (i = 1; i < (full ? 4 : 2); i++)
4402 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4403 break;
4404 case 3:
4405 HALT_UNALLOC;
4406 }
4407 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4408 return;
4409 }
4410
4411 case 1: /* SMINV. */
4412 {
4413 int64_t smin;
4414 switch (INSTR (23, 22))
4415 {
4416 case 0:
4417 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4418 for (i = 1; i < (full ? 16 : 8); i++)
4419 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4420 break;
4421 case 1:
4422 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4423 for (i = 1; i < (full ? 8 : 4); i++)
4424 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4425 break;
4426 case 2:
4427 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4428 for (i = 1; i < (full ? 4 : 2); i++)
4429 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4430 break;
4431
4432 case 3:
4433 HALT_UNALLOC;
4434 }
4435 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4436 return;
4437 }
4438
4439 case 2: /* UMAXV. */
4440 {
4441 uint64_t umax;
4442 switch (INSTR (23, 22))
4443 {
4444 case 0:
4445 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4446 for (i = 1; i < (full ? 16 : 8); i++)
4447 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4448 break;
4449 case 1:
4450 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4451 for (i = 1; i < (full ? 8 : 4); i++)
4452 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4453 break;
4454 case 2:
4455 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4456 for (i = 1; i < (full ? 4 : 2); i++)
4457 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4458 break;
4459
4460 case 3:
4461 HALT_UNALLOC;
4462 }
4463 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4464 return;
4465 }
4466
4467 case 3: /* UMINV. */
4468 {
4469 uint64_t umin;
4470 switch (INSTR (23, 22))
4471 {
4472 case 0:
4473 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4474 for (i = 1; i < (full ? 16 : 8); i++)
4475 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4476 break;
4477 case 1:
4478 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4479 for (i = 1; i < (full ? 8 : 4); i++)
4480 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4481 break;
4482 case 2:
4483 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4484 for (i = 1; i < (full ? 4 : 2); i++)
4485 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4486 break;
4487
4488 case 3:
4489 HALT_UNALLOC;
4490 }
4491 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4492 return;
4493 }
4494 }
4495 }
4496
4497 static void
4498 do_vec_fminmaxV (sim_cpu *cpu)
4499 {
4500 /* instr[31,24] = 0110 1110
4501 instr[23] = max(0)/min(1)
4502 instr[22,14] = 011 0000 11
4503 instr[13,12] = nm(00)/normal(11)
4504 instr[11,10] = 10
4505 instr[9,5] = V source
4506 instr[4.0] = R dest. */
4507
4508 unsigned vs = INSTR (9, 5);
4509 unsigned rd = INSTR (4, 0);
4510 unsigned i;
4511 float res = aarch64_get_vec_float (cpu, vs, 0);
4512
4513 NYI_assert (31, 24, 0x6E);
4514 NYI_assert (22, 14, 0x0C3);
4515 NYI_assert (11, 10, 2);
4516
4517 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4518 if (INSTR (23, 23))
4519 {
4520 switch (INSTR (13, 12))
4521 {
4522 case 0: /* FMNINNMV. */
4523 for (i = 1; i < 4; i++)
4524 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4525 break;
4526
4527 case 3: /* FMINV. */
4528 for (i = 1; i < 4; i++)
4529 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4530 break;
4531
4532 default:
4533 HALT_NYI;
4534 }
4535 }
4536 else
4537 {
4538 switch (INSTR (13, 12))
4539 {
4540 case 0: /* FMNAXNMV. */
4541 for (i = 1; i < 4; i++)
4542 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4543 break;
4544
4545 case 3: /* FMAXV. */
4546 for (i = 1; i < 4; i++)
4547 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4548 break;
4549
4550 default:
4551 HALT_NYI;
4552 }
4553 }
4554
4555 aarch64_set_FP_float (cpu, rd, res);
4556 }
4557
4558 static void
4559 do_vec_Fminmax (sim_cpu *cpu)
4560 {
4561 /* instr[31] = 0
4562 instr[30] = half(0)/full(1)
4563 instr[29,24] = 00 1110
4564 instr[23] = max(0)/min(1)
4565 instr[22] = float(0)/double(1)
4566 instr[21] = 1
4567 instr[20,16] = Vm
4568 instr[15,14] = 11
4569 instr[13,12] = nm(00)/normal(11)
4570 instr[11,10] = 01
4571 instr[9,5] = Vn
4572 instr[4,0] = Vd. */
4573
4574 unsigned vm = INSTR (20, 16);
4575 unsigned vn = INSTR (9, 5);
4576 unsigned vd = INSTR (4, 0);
4577 unsigned full = INSTR (30, 30);
4578 unsigned min = INSTR (23, 23);
4579 unsigned i;
4580
4581 NYI_assert (29, 24, 0x0E);
4582 NYI_assert (21, 21, 1);
4583 NYI_assert (15, 14, 3);
4584 NYI_assert (11, 10, 1);
4585
4586 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4587 if (INSTR (22, 22))
4588 {
4589 double (* func)(double, double);
4590
4591 if (! full)
4592 HALT_NYI;
4593
4594 if (INSTR (13, 12) == 0)
4595 func = min ? dminnm : dmaxnm;
4596 else if (INSTR (13, 12) == 3)
4597 func = min ? fmin : fmax;
4598 else
4599 HALT_NYI;
4600
4601 for (i = 0; i < 2; i++)
4602 aarch64_set_vec_double (cpu, vd, i,
4603 func (aarch64_get_vec_double (cpu, vn, i),
4604 aarch64_get_vec_double (cpu, vm, i)));
4605 }
4606 else
4607 {
4608 float (* func)(float, float);
4609
4610 if (INSTR (13, 12) == 0)
4611 func = min ? fminnm : fmaxnm;
4612 else if (INSTR (13, 12) == 3)
4613 func = min ? fminf : fmaxf;
4614 else
4615 HALT_NYI;
4616
4617 for (i = 0; i < (full ? 4 : 2); i++)
4618 aarch64_set_vec_float (cpu, vd, i,
4619 func (aarch64_get_vec_float (cpu, vn, i),
4620 aarch64_get_vec_float (cpu, vm, i)));
4621 }
4622 }
4623
4624 static void
4625 do_vec_SCVTF (sim_cpu *cpu)
4626 {
4627 /* instr[31] = 0
4628 instr[30] = Q
4629 instr[29,23] = 00 1110 0
4630 instr[22] = float(0)/double(1)
4631 instr[21,10] = 10 0001 1101 10
4632 instr[9,5] = Vn
4633 instr[4,0] = Vd. */
4634
4635 unsigned vn = INSTR (9, 5);
4636 unsigned vd = INSTR (4, 0);
4637 unsigned full = INSTR (30, 30);
4638 unsigned size = INSTR (22, 22);
4639 unsigned i;
4640
4641 NYI_assert (29, 23, 0x1C);
4642 NYI_assert (21, 10, 0x876);
4643
4644 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4645 if (size)
4646 {
4647 if (! full)
4648 HALT_UNALLOC;
4649
4650 for (i = 0; i < 2; i++)
4651 {
4652 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4653 aarch64_set_vec_double (cpu, vd, i, val);
4654 }
4655 }
4656 else
4657 {
4658 for (i = 0; i < (full ? 4 : 2); i++)
4659 {
4660 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4661 aarch64_set_vec_float (cpu, vd, i, val);
4662 }
4663 }
4664 }
4665
4666 #define VEC_CMP(SOURCE, CMP) \
4667 do \
4668 { \
4669 switch (size) \
4670 { \
4671 case 0: \
4672 for (i = 0; i < (full ? 16 : 8); i++) \
4673 aarch64_set_vec_u8 (cpu, vd, i, \
4674 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4675 CMP \
4676 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4677 ? -1 : 0); \
4678 return; \
4679 case 1: \
4680 for (i = 0; i < (full ? 8 : 4); i++) \
4681 aarch64_set_vec_u16 (cpu, vd, i, \
4682 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4683 CMP \
4684 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4685 ? -1 : 0); \
4686 return; \
4687 case 2: \
4688 for (i = 0; i < (full ? 4 : 2); i++) \
4689 aarch64_set_vec_u32 (cpu, vd, i, \
4690 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4691 CMP \
4692 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4693 ? -1 : 0); \
4694 return; \
4695 case 3: \
4696 if (! full) \
4697 HALT_UNALLOC; \
4698 for (i = 0; i < 2; i++) \
4699 aarch64_set_vec_u64 (cpu, vd, i, \
4700 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4701 CMP \
4702 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4703 ? -1ULL : 0); \
4704 return; \
4705 } \
4706 } \
4707 while (0)
4708
4709 #define VEC_CMP0(SOURCE, CMP) \
4710 do \
4711 { \
4712 switch (size) \
4713 { \
4714 case 0: \
4715 for (i = 0; i < (full ? 16 : 8); i++) \
4716 aarch64_set_vec_u8 (cpu, vd, i, \
4717 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4718 CMP 0 ? -1 : 0); \
4719 return; \
4720 case 1: \
4721 for (i = 0; i < (full ? 8 : 4); i++) \
4722 aarch64_set_vec_u16 (cpu, vd, i, \
4723 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4724 CMP 0 ? -1 : 0); \
4725 return; \
4726 case 2: \
4727 for (i = 0; i < (full ? 4 : 2); i++) \
4728 aarch64_set_vec_u32 (cpu, vd, i, \
4729 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4730 CMP 0 ? -1 : 0); \
4731 return; \
4732 case 3: \
4733 if (! full) \
4734 HALT_UNALLOC; \
4735 for (i = 0; i < 2; i++) \
4736 aarch64_set_vec_u64 (cpu, vd, i, \
4737 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4738 CMP 0 ? -1ULL : 0); \
4739 return; \
4740 } \
4741 } \
4742 while (0)
4743
4744 #define VEC_FCMP0(CMP) \
4745 do \
4746 { \
4747 if (vm != 0) \
4748 HALT_NYI; \
4749 if (INSTR (22, 22)) \
4750 { \
4751 if (! full) \
4752 HALT_NYI; \
4753 for (i = 0; i < 2; i++) \
4754 aarch64_set_vec_u64 (cpu, vd, i, \
4755 aarch64_get_vec_double (cpu, vn, i) \
4756 CMP 0.0 ? -1 : 0); \
4757 } \
4758 else \
4759 { \
4760 for (i = 0; i < (full ? 4 : 2); i++) \
4761 aarch64_set_vec_u32 (cpu, vd, i, \
4762 aarch64_get_vec_float (cpu, vn, i) \
4763 CMP 0.0 ? -1 : 0); \
4764 } \
4765 return; \
4766 } \
4767 while (0)
4768
4769 #define VEC_FCMP(CMP) \
4770 do \
4771 { \
4772 if (INSTR (22, 22)) \
4773 { \
4774 if (! full) \
4775 HALT_NYI; \
4776 for (i = 0; i < 2; i++) \
4777 aarch64_set_vec_u64 (cpu, vd, i, \
4778 aarch64_get_vec_double (cpu, vn, i) \
4779 CMP \
4780 aarch64_get_vec_double (cpu, vm, i) \
4781 ? -1 : 0); \
4782 } \
4783 else \
4784 { \
4785 for (i = 0; i < (full ? 4 : 2); i++) \
4786 aarch64_set_vec_u32 (cpu, vd, i, \
4787 aarch64_get_vec_float (cpu, vn, i) \
4788 CMP \
4789 aarch64_get_vec_float (cpu, vm, i) \
4790 ? -1 : 0); \
4791 } \
4792 return; \
4793 } \
4794 while (0)
4795
4796 static void
4797 do_vec_compare (sim_cpu *cpu)
4798 {
4799 /* instr[31] = 0
4800 instr[30] = half(0)/full(1)
4801 instr[29] = part-of-comparison-type
4802 instr[28,24] = 0 1110
4803 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4804 type of float compares: single (-0) / double (-1)
4805 instr[21] = 1
4806 instr[20,16] = Vm or 00000 (compare vs 0)
4807 instr[15,10] = part-of-comparison-type
4808 instr[9,5] = Vn
4809 instr[4.0] = Vd. */
4810
4811 int full = INSTR (30, 30);
4812 int size = INSTR (23, 22);
4813 unsigned vm = INSTR (20, 16);
4814 unsigned vn = INSTR (9, 5);
4815 unsigned vd = INSTR (4, 0);
4816 unsigned i;
4817
4818 NYI_assert (28, 24, 0x0E);
4819 NYI_assert (21, 21, 1);
4820
4821 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4822 if ((INSTR (11, 11)
4823 && INSTR (14, 14))
4824 || ((INSTR (11, 11) == 0
4825 && INSTR (10, 10) == 0)))
4826 {
4827 /* A compare vs 0. */
4828 if (vm != 0)
4829 {
4830 if (INSTR (15, 10) == 0x2A)
4831 do_vec_maxv (cpu);
4832 else if (INSTR (15, 10) == 0x32
4833 || INSTR (15, 10) == 0x3E)
4834 do_vec_fminmaxV (cpu);
4835 else if (INSTR (29, 23) == 0x1C
4836 && INSTR (21, 10) == 0x876)
4837 do_vec_SCVTF (cpu);
4838 else
4839 HALT_NYI;
4840 return;
4841 }
4842 }
4843
4844 if (INSTR (14, 14))
4845 {
4846 /* A floating point compare. */
4847 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4848 | INSTR (13, 10);
4849
4850 NYI_assert (15, 15, 1);
4851
4852 switch (decode)
4853 {
4854 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4855 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4856 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4857 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4858 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4859 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4860 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4861 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4862
4863 default:
4864 HALT_NYI;
4865 }
4866 }
4867 else
4868 {
4869 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4870
4871 switch (decode)
4872 {
4873 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4874 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4875 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4876 case 0x23: /* 0100011 TST */ VEC_CMP (u, & );
4877 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4878 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4879 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4880 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4881 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4882 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4883 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4884 default:
4885 if (vm == 0)
4886 HALT_NYI;
4887 do_vec_maxv (cpu);
4888 }
4889 }
4890 }
4891
4892 static void
4893 do_vec_SSHL (sim_cpu *cpu)
4894 {
4895 /* instr[31] = 0
4896 instr[30] = first part (0)/ second part (1)
4897 instr[29,24] = 00 1110
4898 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4899 instr[21] = 1
4900 instr[20,16] = Vm
4901 instr[15,10] = 0100 01
4902 instr[9,5] = Vn
4903 instr[4,0] = Vd. */
4904
4905 unsigned full = INSTR (30, 30);
4906 unsigned vm = INSTR (20, 16);
4907 unsigned vn = INSTR (9, 5);
4908 unsigned vd = INSTR (4, 0);
4909 unsigned i;
4910 signed int shift;
4911
4912 NYI_assert (29, 24, 0x0E);
4913 NYI_assert (21, 21, 1);
4914 NYI_assert (15, 10, 0x11);
4915
4916 /* FIXME: What is a signed shift left in this context ?. */
4917
4918 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4919 switch (INSTR (23, 22))
4920 {
4921 case 0:
4922 for (i = 0; i < (full ? 16 : 8); i++)
4923 {
4924 shift = aarch64_get_vec_s8 (cpu, vm, i);
4925 if (shift >= 0)
4926 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4927 << shift);
4928 else
4929 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4930 >> - shift);
4931 }
4932 return;
4933
4934 case 1:
4935 for (i = 0; i < (full ? 8 : 4); i++)
4936 {
4937 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4938 if (shift >= 0)
4939 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4940 << shift);
4941 else
4942 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4943 >> - shift);
4944 }
4945 return;
4946
4947 case 2:
4948 for (i = 0; i < (full ? 4 : 2); i++)
4949 {
4950 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4951 if (shift >= 0)
4952 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4953 << shift);
4954 else
4955 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4956 >> - shift);
4957 }
4958 return;
4959
4960 case 3:
4961 if (! full)
4962 HALT_UNALLOC;
4963 for (i = 0; i < 2; i++)
4964 {
4965 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4966 if (shift >= 0)
4967 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4968 << shift);
4969 else
4970 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4971 >> - shift);
4972 }
4973 return;
4974 }
4975 }
4976
4977 static void
4978 do_vec_USHL (sim_cpu *cpu)
4979 {
4980 /* instr[31] = 0
4981 instr[30] = first part (0)/ second part (1)
4982 instr[29,24] = 10 1110
4983 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4984 instr[21] = 1
4985 instr[20,16] = Vm
4986 instr[15,10] = 0100 01
4987 instr[9,5] = Vn
4988 instr[4,0] = Vd */
4989
4990 unsigned full = INSTR (30, 30);
4991 unsigned vm = INSTR (20, 16);
4992 unsigned vn = INSTR (9, 5);
4993 unsigned vd = INSTR (4, 0);
4994 unsigned i;
4995 signed int shift;
4996
4997 NYI_assert (29, 24, 0x2E);
4998 NYI_assert (15, 10, 0x11);
4999
5000 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5001 switch (INSTR (23, 22))
5002 {
5003 case 0:
5004 for (i = 0; i < (full ? 16 : 8); i++)
5005 {
5006 shift = aarch64_get_vec_s8 (cpu, vm, i);
5007 if (shift >= 0)
5008 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5009 << shift);
5010 else
5011 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5012 >> - shift);
5013 }
5014 return;
5015
5016 case 1:
5017 for (i = 0; i < (full ? 8 : 4); i++)
5018 {
5019 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
5020 if (shift >= 0)
5021 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5022 << shift);
5023 else
5024 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5025 >> - shift);
5026 }
5027 return;
5028
5029 case 2:
5030 for (i = 0; i < (full ? 4 : 2); i++)
5031 {
5032 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
5033 if (shift >= 0)
5034 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5035 << shift);
5036 else
5037 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5038 >> - shift);
5039 }
5040 return;
5041
5042 case 3:
5043 if (! full)
5044 HALT_UNALLOC;
5045 for (i = 0; i < 2; i++)
5046 {
5047 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
5048 if (shift >= 0)
5049 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5050 << shift);
5051 else
5052 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5053 >> - shift);
5054 }
5055 return;
5056 }
5057 }
5058
5059 static void
5060 do_vec_FMLA (sim_cpu *cpu)
5061 {
5062 /* instr[31] = 0
5063 instr[30] = full/half selector
5064 instr[29,23] = 0011100
5065 instr[22] = size: 0=>float, 1=>double
5066 instr[21] = 1
5067 instr[20,16] = Vn
5068 instr[15,10] = 1100 11
5069 instr[9,5] = Vm
5070 instr[4.0] = Vd. */
5071
5072 unsigned vm = INSTR (20, 16);
5073 unsigned vn = INSTR (9, 5);
5074 unsigned vd = INSTR (4, 0);
5075 unsigned i;
5076 int full = INSTR (30, 30);
5077
5078 NYI_assert (29, 23, 0x1C);
5079 NYI_assert (21, 21, 1);
5080 NYI_assert (15, 10, 0x33);
5081
5082 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5083 if (INSTR (22, 22))
5084 {
5085 if (! full)
5086 HALT_UNALLOC;
5087 for (i = 0; i < 2; i++)
5088 aarch64_set_vec_double (cpu, vd, i,
5089 aarch64_get_vec_double (cpu, vn, i) *
5090 aarch64_get_vec_double (cpu, vm, i) +
5091 aarch64_get_vec_double (cpu, vd, i));
5092 }
5093 else
5094 {
5095 for (i = 0; i < (full ? 4 : 2); i++)
5096 aarch64_set_vec_float (cpu, vd, i,
5097 aarch64_get_vec_float (cpu, vn, i) *
5098 aarch64_get_vec_float (cpu, vm, i) +
5099 aarch64_get_vec_float (cpu, vd, i));
5100 }
5101 }
5102
5103 static void
5104 do_vec_max (sim_cpu *cpu)
5105 {
5106 /* instr[31] = 0
5107 instr[30] = full/half selector
5108 instr[29] = SMAX (0) / UMAX (1)
5109 instr[28,24] = 0 1110
5110 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5111 instr[21] = 1
5112 instr[20,16] = Vn
5113 instr[15,10] = 0110 01
5114 instr[9,5] = Vm
5115 instr[4.0] = Vd. */
5116
5117 unsigned vm = INSTR (20, 16);
5118 unsigned vn = INSTR (9, 5);
5119 unsigned vd = INSTR (4, 0);
5120 unsigned i;
5121 int full = INSTR (30, 30);
5122
5123 NYI_assert (28, 24, 0x0E);
5124 NYI_assert (21, 21, 1);
5125 NYI_assert (15, 10, 0x19);
5126
5127 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5128 if (INSTR (29, 29))
5129 {
5130 switch (INSTR (23, 22))
5131 {
5132 case 0:
5133 for (i = 0; i < (full ? 16 : 8); i++)
5134 aarch64_set_vec_u8 (cpu, vd, i,
5135 aarch64_get_vec_u8 (cpu, vn, i)
5136 > aarch64_get_vec_u8 (cpu, vm, i)
5137 ? aarch64_get_vec_u8 (cpu, vn, i)
5138 : aarch64_get_vec_u8 (cpu, vm, i));
5139 return;
5140
5141 case 1:
5142 for (i = 0; i < (full ? 8 : 4); i++)
5143 aarch64_set_vec_u16 (cpu, vd, i,
5144 aarch64_get_vec_u16 (cpu, vn, i)
5145 > aarch64_get_vec_u16 (cpu, vm, i)
5146 ? aarch64_get_vec_u16 (cpu, vn, i)
5147 : aarch64_get_vec_u16 (cpu, vm, i));
5148 return;
5149
5150 case 2:
5151 for (i = 0; i < (full ? 4 : 2); i++)
5152 aarch64_set_vec_u32 (cpu, vd, i,
5153 aarch64_get_vec_u32 (cpu, vn, i)
5154 > aarch64_get_vec_u32 (cpu, vm, i)
5155 ? aarch64_get_vec_u32 (cpu, vn, i)
5156 : aarch64_get_vec_u32 (cpu, vm, i));
5157 return;
5158
5159 case 3:
5160 HALT_UNALLOC;
5161 }
5162 }
5163 else
5164 {
5165 switch (INSTR (23, 22))
5166 {
5167 case 0:
5168 for (i = 0; i < (full ? 16 : 8); i++)
5169 aarch64_set_vec_s8 (cpu, vd, i,
5170 aarch64_get_vec_s8 (cpu, vn, i)
5171 > aarch64_get_vec_s8 (cpu, vm, i)
5172 ? aarch64_get_vec_s8 (cpu, vn, i)
5173 : aarch64_get_vec_s8 (cpu, vm, i));
5174 return;
5175
5176 case 1:
5177 for (i = 0; i < (full ? 8 : 4); i++)
5178 aarch64_set_vec_s16 (cpu, vd, i,
5179 aarch64_get_vec_s16 (cpu, vn, i)
5180 > aarch64_get_vec_s16 (cpu, vm, i)
5181 ? aarch64_get_vec_s16 (cpu, vn, i)
5182 : aarch64_get_vec_s16 (cpu, vm, i));
5183 return;
5184
5185 case 2:
5186 for (i = 0; i < (full ? 4 : 2); i++)
5187 aarch64_set_vec_s32 (cpu, vd, i,
5188 aarch64_get_vec_s32 (cpu, vn, i)
5189 > aarch64_get_vec_s32 (cpu, vm, i)
5190 ? aarch64_get_vec_s32 (cpu, vn, i)
5191 : aarch64_get_vec_s32 (cpu, vm, i));
5192 return;
5193
5194 case 3:
5195 HALT_UNALLOC;
5196 }
5197 }
5198 }
5199
5200 static void
5201 do_vec_min (sim_cpu *cpu)
5202 {
5203 /* instr[31] = 0
5204 instr[30] = full/half selector
5205 instr[29] = SMIN (0) / UMIN (1)
5206 instr[28,24] = 0 1110
5207 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5208 instr[21] = 1
5209 instr[20,16] = Vn
5210 instr[15,10] = 0110 11
5211 instr[9,5] = Vm
5212 instr[4.0] = Vd. */
5213
5214 unsigned vm = INSTR (20, 16);
5215 unsigned vn = INSTR (9, 5);
5216 unsigned vd = INSTR (4, 0);
5217 unsigned i;
5218 int full = INSTR (30, 30);
5219
5220 NYI_assert (28, 24, 0x0E);
5221 NYI_assert (21, 21, 1);
5222 NYI_assert (15, 10, 0x1B);
5223
5224 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5225 if (INSTR (29, 29))
5226 {
5227 switch (INSTR (23, 22))
5228 {
5229 case 0:
5230 for (i = 0; i < (full ? 16 : 8); i++)
5231 aarch64_set_vec_u8 (cpu, vd, i,
5232 aarch64_get_vec_u8 (cpu, vn, i)
5233 < aarch64_get_vec_u8 (cpu, vm, i)
5234 ? aarch64_get_vec_u8 (cpu, vn, i)
5235 : aarch64_get_vec_u8 (cpu, vm, i));
5236 return;
5237
5238 case 1:
5239 for (i = 0; i < (full ? 8 : 4); i++)
5240 aarch64_set_vec_u16 (cpu, vd, i,
5241 aarch64_get_vec_u16 (cpu, vn, i)
5242 < aarch64_get_vec_u16 (cpu, vm, i)
5243 ? aarch64_get_vec_u16 (cpu, vn, i)
5244 : aarch64_get_vec_u16 (cpu, vm, i));
5245 return;
5246
5247 case 2:
5248 for (i = 0; i < (full ? 4 : 2); i++)
5249 aarch64_set_vec_u32 (cpu, vd, i,
5250 aarch64_get_vec_u32 (cpu, vn, i)
5251 < aarch64_get_vec_u32 (cpu, vm, i)
5252 ? aarch64_get_vec_u32 (cpu, vn, i)
5253 : aarch64_get_vec_u32 (cpu, vm, i));
5254 return;
5255
5256 case 3:
5257 HALT_UNALLOC;
5258 }
5259 }
5260 else
5261 {
5262 switch (INSTR (23, 22))
5263 {
5264 case 0:
5265 for (i = 0; i < (full ? 16 : 8); i++)
5266 aarch64_set_vec_s8 (cpu, vd, i,
5267 aarch64_get_vec_s8 (cpu, vn, i)
5268 < aarch64_get_vec_s8 (cpu, vm, i)
5269 ? aarch64_get_vec_s8 (cpu, vn, i)
5270 : aarch64_get_vec_s8 (cpu, vm, i));
5271 return;
5272
5273 case 1:
5274 for (i = 0; i < (full ? 8 : 4); i++)
5275 aarch64_set_vec_s16 (cpu, vd, i,
5276 aarch64_get_vec_s16 (cpu, vn, i)
5277 < aarch64_get_vec_s16 (cpu, vm, i)
5278 ? aarch64_get_vec_s16 (cpu, vn, i)
5279 : aarch64_get_vec_s16 (cpu, vm, i));
5280 return;
5281
5282 case 2:
5283 for (i = 0; i < (full ? 4 : 2); i++)
5284 aarch64_set_vec_s32 (cpu, vd, i,
5285 aarch64_get_vec_s32 (cpu, vn, i)
5286 < aarch64_get_vec_s32 (cpu, vm, i)
5287 ? aarch64_get_vec_s32 (cpu, vn, i)
5288 : aarch64_get_vec_s32 (cpu, vm, i));
5289 return;
5290
5291 case 3:
5292 HALT_UNALLOC;
5293 }
5294 }
5295 }
5296
5297 static void
5298 do_vec_sub_long (sim_cpu *cpu)
5299 {
5300 /* instr[31] = 0
5301 instr[30] = lower (0) / upper (1)
5302 instr[29] = signed (0) / unsigned (1)
5303 instr[28,24] = 0 1110
5304 instr[23,22] = size: bytes (00), half (01), word (10)
5305 instr[21] = 1
5306 insrt[20,16] = Vm
5307 instr[15,10] = 0010 00
5308 instr[9,5] = Vn
5309 instr[4,0] = V dest. */
5310
5311 unsigned size = INSTR (23, 22);
5312 unsigned vm = INSTR (20, 16);
5313 unsigned vn = INSTR (9, 5);
5314 unsigned vd = INSTR (4, 0);
5315 unsigned bias = 0;
5316 unsigned i;
5317
5318 NYI_assert (28, 24, 0x0E);
5319 NYI_assert (21, 21, 1);
5320 NYI_assert (15, 10, 0x08);
5321
5322 if (size == 3)
5323 HALT_UNALLOC;
5324
5325 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5326 switch (INSTR (30, 29))
5327 {
5328 case 2: /* SSUBL2. */
5329 bias = 2;
5330 case 0: /* SSUBL. */
5331 switch (size)
5332 {
5333 case 0:
5334 bias *= 3;
5335 for (i = 0; i < 8; i++)
5336 aarch64_set_vec_s16 (cpu, vd, i,
5337 aarch64_get_vec_s8 (cpu, vn, i + bias)
5338 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5339 break;
5340
5341 case 1:
5342 bias *= 2;
5343 for (i = 0; i < 4; i++)
5344 aarch64_set_vec_s32 (cpu, vd, i,
5345 aarch64_get_vec_s16 (cpu, vn, i + bias)
5346 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5347 break;
5348
5349 case 2:
5350 for (i = 0; i < 2; i++)
5351 aarch64_set_vec_s64 (cpu, vd, i,
5352 aarch64_get_vec_s32 (cpu, vn, i + bias)
5353 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5354 break;
5355
5356 default:
5357 HALT_UNALLOC;
5358 }
5359 break;
5360
5361 case 3: /* USUBL2. */
5362 bias = 2;
5363 case 1: /* USUBL. */
5364 switch (size)
5365 {
5366 case 0:
5367 bias *= 3;
5368 for (i = 0; i < 8; i++)
5369 aarch64_set_vec_u16 (cpu, vd, i,
5370 aarch64_get_vec_u8 (cpu, vn, i + bias)
5371 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5372 break;
5373
5374 case 1:
5375 bias *= 2;
5376 for (i = 0; i < 4; i++)
5377 aarch64_set_vec_u32 (cpu, vd, i,
5378 aarch64_get_vec_u16 (cpu, vn, i + bias)
5379 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5380 break;
5381
5382 case 2:
5383 for (i = 0; i < 2; i++)
5384 aarch64_set_vec_u64 (cpu, vd, i,
5385 aarch64_get_vec_u32 (cpu, vn, i + bias)
5386 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5387 break;
5388
5389 default:
5390 HALT_UNALLOC;
5391 }
5392 break;
5393 }
5394 }
5395
5396 static void
5397 do_vec_ADDP (sim_cpu *cpu)
5398 {
5399 /* instr[31] = 0
5400 instr[30] = half(0)/full(1)
5401 instr[29,24] = 00 1110
5402 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5403 instr[21] = 1
5404 insrt[20,16] = Vm
5405 instr[15,10] = 1011 11
5406 instr[9,5] = Vn
5407 instr[4,0] = V dest. */
5408
5409 FRegister copy_vn;
5410 FRegister copy_vm;
5411 unsigned full = INSTR (30, 30);
5412 unsigned size = INSTR (23, 22);
5413 unsigned vm = INSTR (20, 16);
5414 unsigned vn = INSTR (9, 5);
5415 unsigned vd = INSTR (4, 0);
5416 unsigned i, range;
5417
5418 NYI_assert (29, 24, 0x0E);
5419 NYI_assert (21, 21, 1);
5420 NYI_assert (15, 10, 0x2F);
5421
5422 /* Make copies of the source registers in case vd == vn/vm. */
5423 copy_vn = cpu->fr[vn];
5424 copy_vm = cpu->fr[vm];
5425
5426 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5427 switch (size)
5428 {
5429 case 0:
5430 range = full ? 8 : 4;
5431 for (i = 0; i < range; i++)
5432 {
5433 aarch64_set_vec_u8 (cpu, vd, i,
5434 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5435 aarch64_set_vec_u8 (cpu, vd, i + range,
5436 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5437 }
5438 return;
5439
5440 case 1:
5441 range = full ? 4 : 2;
5442 for (i = 0; i < range; i++)
5443 {
5444 aarch64_set_vec_u16 (cpu, vd, i,
5445 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5446 aarch64_set_vec_u16 (cpu, vd, i + range,
5447 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5448 }
5449 return;
5450
5451 case 2:
5452 range = full ? 2 : 1;
5453 for (i = 0; i < range; i++)
5454 {
5455 aarch64_set_vec_u32 (cpu, vd, i,
5456 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5457 aarch64_set_vec_u32 (cpu, vd, i + range,
5458 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5459 }
5460 return;
5461
5462 case 3:
5463 if (! full)
5464 HALT_UNALLOC;
5465 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5466 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5467 return;
5468 }
5469 }
5470
5471 static void
5472 do_vec_FABS (sim_cpu *cpu)
5473 {
5474 /* instr[31] = 0
5475 instr[30] = half(0)/full(1)
5476 instr[29,23] = 00 1110 1
5477 instr[22] = float(0)/double(1)
5478 instr[21,16] = 10 0000
5479 instr[15,10] = 1111 10
5480 instr[9,5] = Vn
5481 instr[4,0] = Vd. */
5482
5483 unsigned vn = INSTR (9, 5);
5484 unsigned vd = INSTR (4, 0);
5485 unsigned full = INSTR (30, 30);
5486 unsigned i;
5487
5488 NYI_assert (29, 23, 0x1D);
5489 NYI_assert (21, 10, 0x83E);
5490
5491 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5492 if (INSTR (22, 22))
5493 {
5494 if (! full)
5495 HALT_NYI;
5496
5497 for (i = 0; i < 2; i++)
5498 aarch64_set_vec_double (cpu, vd, i,
5499 fabs (aarch64_get_vec_double (cpu, vn, i)));
5500 }
5501 else
5502 {
5503 for (i = 0; i < (full ? 4 : 2); i++)
5504 aarch64_set_vec_float (cpu, vd, i,
5505 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5506 }
5507 }
5508
5509 static void
5510 do_vec_FCVTZS (sim_cpu *cpu)
5511 {
5512 /* instr[31] = 0
5513 instr[30] = half (0) / all (1)
5514 instr[29,23] = 00 1110 1
5515 instr[22] = single (0) / double (1)
5516 instr[21,10] = 10 0001 1011 10
5517 instr[9,5] = Rn
5518 instr[4,0] = Rd. */
5519
5520 unsigned rn = INSTR (9, 5);
5521 unsigned rd = INSTR (4, 0);
5522 unsigned full = INSTR (30, 30);
5523 unsigned i;
5524
5525 NYI_assert (31, 31, 0);
5526 NYI_assert (29, 23, 0x1D);
5527 NYI_assert (21, 10, 0x86E);
5528
5529 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5530 if (INSTR (22, 22))
5531 {
5532 if (! full)
5533 HALT_UNALLOC;
5534
5535 for (i = 0; i < 2; i++)
5536 aarch64_set_vec_s64 (cpu, rd, i,
5537 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5538 }
5539 else
5540 for (i = 0; i < (full ? 4 : 2); i++)
5541 aarch64_set_vec_s32 (cpu, rd, i,
5542 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5543 }
5544
5545 static void
5546 do_vec_REV64 (sim_cpu *cpu)
5547 {
5548 /* instr[31] = 0
5549 instr[30] = full/half
5550 instr[29,24] = 00 1110
5551 instr[23,22] = size
5552 instr[21,10] = 10 0000 0000 10
5553 instr[9,5] = Rn
5554 instr[4,0] = Rd. */
5555
5556 unsigned rn = INSTR (9, 5);
5557 unsigned rd = INSTR (4, 0);
5558 unsigned size = INSTR (23, 22);
5559 unsigned full = INSTR (30, 30);
5560 unsigned i;
5561 FRegister val;
5562
5563 NYI_assert (29, 24, 0x0E);
5564 NYI_assert (21, 10, 0x802);
5565
5566 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5567 switch (size)
5568 {
5569 case 0:
5570 for (i = 0; i < (full ? 16 : 8); i++)
5571 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5572 break;
5573
5574 case 1:
5575 for (i = 0; i < (full ? 8 : 4); i++)
5576 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5577 break;
5578
5579 case 2:
5580 for (i = 0; i < (full ? 4 : 2); i++)
5581 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5582 break;
5583
5584 case 3:
5585 HALT_UNALLOC;
5586 }
5587
5588 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5589 if (full)
5590 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5591 }
5592
5593 static void
5594 do_vec_REV16 (sim_cpu *cpu)
5595 {
5596 /* instr[31] = 0
5597 instr[30] = full/half
5598 instr[29,24] = 00 1110
5599 instr[23,22] = size
5600 instr[21,10] = 10 0000 0001 10
5601 instr[9,5] = Rn
5602 instr[4,0] = Rd. */
5603
5604 unsigned rn = INSTR (9, 5);
5605 unsigned rd = INSTR (4, 0);
5606 unsigned size = INSTR (23, 22);
5607 unsigned full = INSTR (30, 30);
5608 unsigned i;
5609 FRegister val;
5610
5611 NYI_assert (29, 24, 0x0E);
5612 NYI_assert (21, 10, 0x806);
5613
5614 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5615 switch (size)
5616 {
5617 case 0:
5618 for (i = 0; i < (full ? 16 : 8); i++)
5619 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5620 break;
5621
5622 default:
5623 HALT_UNALLOC;
5624 }
5625
5626 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5627 if (full)
5628 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5629 }
5630
5631 static void
5632 do_vec_op1 (sim_cpu *cpu)
5633 {
5634 /* instr[31] = 0
5635 instr[30] = half/full
5636 instr[29,24] = 00 1110
5637 instr[23,21] = ???
5638 instr[20,16] = Vm
5639 instr[15,10] = sub-opcode
5640 instr[9,5] = Vn
5641 instr[4,0] = Vd */
5642 NYI_assert (29, 24, 0x0E);
5643
5644 if (INSTR (21, 21) == 0)
5645 {
5646 if (INSTR (23, 22) == 0)
5647 {
5648 if (INSTR (30, 30) == 1
5649 && INSTR (17, 14) == 0
5650 && INSTR (12, 10) == 7)
5651 return do_vec_ins_2 (cpu);
5652
5653 switch (INSTR (15, 10))
5654 {
5655 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5656 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5657 case 0x07: do_vec_INS (cpu); return;
5658 case 0x0B: do_vec_SMOV_into_scalar (cpu); return;
5659 case 0x0F: do_vec_UMOV_into_scalar (cpu); return;
5660
5661 case 0x00:
5662 case 0x08:
5663 case 0x10:
5664 case 0x18:
5665 do_vec_TBL (cpu); return;
5666
5667 case 0x06:
5668 case 0x16:
5669 do_vec_UZP (cpu); return;
5670
5671 case 0x0A: do_vec_TRN (cpu); return;
5672
5673 case 0x0E:
5674 case 0x1E:
5675 do_vec_ZIP (cpu); return;
5676
5677 default:
5678 HALT_NYI;
5679 }
5680 }
5681
5682 switch (INSTR (13, 10))
5683 {
5684 case 0x6: do_vec_UZP (cpu); return;
5685 case 0xE: do_vec_ZIP (cpu); return;
5686 case 0xA: do_vec_TRN (cpu); return;
5687 default: HALT_NYI;
5688 }
5689 }
5690
5691 switch (INSTR (15, 10))
5692 {
5693 case 0x02: do_vec_REV64 (cpu); return;
5694 case 0x06: do_vec_REV16 (cpu); return;
5695
5696 case 0x07:
5697 switch (INSTR (23, 21))
5698 {
5699 case 1: do_vec_AND (cpu); return;
5700 case 3: do_vec_BIC (cpu); return;
5701 case 5: do_vec_ORR (cpu); return;
5702 case 7: do_vec_ORN (cpu); return;
5703 default: HALT_NYI;
5704 }
5705
5706 case 0x08: do_vec_sub_long (cpu); return;
5707 case 0x0a: do_vec_XTN (cpu); return;
5708 case 0x11: do_vec_SSHL (cpu); return;
5709 case 0x16: do_vec_CNT (cpu); return;
5710 case 0x19: do_vec_max (cpu); return;
5711 case 0x1B: do_vec_min (cpu); return;
5712 case 0x21: do_vec_add (cpu); return;
5713 case 0x25: do_vec_MLA (cpu); return;
5714 case 0x27: do_vec_mul (cpu); return;
5715 case 0x2F: do_vec_ADDP (cpu); return;
5716 case 0x30: do_vec_mull (cpu); return;
5717 case 0x33: do_vec_FMLA (cpu); return;
5718 case 0x35: do_vec_fadd (cpu); return;
5719
5720 case 0x2E:
5721 switch (INSTR (20, 16))
5722 {
5723 case 0x00: do_vec_ABS (cpu); return;
5724 case 0x01: do_vec_FCVTZS (cpu); return;
5725 case 0x11: do_vec_ADDV (cpu); return;
5726 default: HALT_NYI;
5727 }
5728
5729 case 0x31:
5730 case 0x3B:
5731 do_vec_Fminmax (cpu); return;
5732
5733 case 0x0D:
5734 case 0x0F:
5735 case 0x22:
5736 case 0x23:
5737 case 0x26:
5738 case 0x2A:
5739 case 0x32:
5740 case 0x36:
5741 case 0x39:
5742 case 0x3A:
5743 do_vec_compare (cpu); return;
5744
5745 case 0x3E:
5746 do_vec_FABS (cpu); return;
5747
5748 default:
5749 HALT_NYI;
5750 }
5751 }
5752
5753 static void
5754 do_vec_xtl (sim_cpu *cpu)
5755 {
5756 /* instr[31] = 0
5757 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5758 instr[28,22] = 0 1111 00
5759 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5760 instr[15,10] = 1010 01
5761 instr[9,5] = V source
5762 instr[4,0] = V dest. */
5763
5764 unsigned vs = INSTR (9, 5);
5765 unsigned vd = INSTR (4, 0);
5766 unsigned i, shift, bias = 0;
5767
5768 NYI_assert (28, 22, 0x3C);
5769 NYI_assert (15, 10, 0x29);
5770
5771 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5772 switch (INSTR (30, 29))
5773 {
5774 case 2: /* SXTL2, SSHLL2. */
5775 bias = 2;
5776 case 0: /* SXTL, SSHLL. */
5777 if (INSTR (21, 21))
5778 {
5779 int64_t val1, val2;
5780
5781 shift = INSTR (20, 16);
5782 /* Get the source values before setting the destination values
5783 in case the source and destination are the same. */
5784 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5785 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5786 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5787 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5788 }
5789 else if (INSTR (20, 20))
5790 {
5791 int32_t v[4];
5792 int32_t v1,v2,v3,v4;
5793
5794 shift = INSTR (19, 16);
5795 bias *= 2;
5796 for (i = 0; i < 4; i++)
5797 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5798 for (i = 0; i < 4; i++)
5799 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5800 }
5801 else
5802 {
5803 int16_t v[8];
5804 NYI_assert (19, 19, 1);
5805
5806 shift = INSTR (18, 16);
5807 bias *= 4;
5808 for (i = 0; i < 8; i++)
5809 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5810 for (i = 0; i < 8; i++)
5811 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5812 }
5813 return;
5814
5815 case 3: /* UXTL2, USHLL2. */
5816 bias = 2;
5817 case 1: /* UXTL, USHLL. */
5818 if (INSTR (21, 21))
5819 {
5820 uint64_t v1, v2;
5821 shift = INSTR (20, 16);
5822 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5823 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5824 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5825 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5826 }
5827 else if (INSTR (20, 20))
5828 {
5829 uint32_t v[4];
5830 shift = INSTR (19, 16);
5831 bias *= 2;
5832 for (i = 0; i < 4; i++)
5833 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5834 for (i = 0; i < 4; i++)
5835 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5836 }
5837 else
5838 {
5839 uint16_t v[8];
5840 NYI_assert (19, 19, 1);
5841
5842 shift = INSTR (18, 16);
5843 bias *= 4;
5844 for (i = 0; i < 8; i++)
5845 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5846 for (i = 0; i < 8; i++)
5847 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5848 }
5849 return;
5850 }
5851 }
5852
5853 static void
5854 do_vec_SHL (sim_cpu *cpu)
5855 {
5856 /* instr [31] = 0
5857 instr [30] = half(0)/full(1)
5858 instr [29,23] = 001 1110
5859 instr [22,16] = size and shift amount
5860 instr [15,10] = 01 0101
5861 instr [9, 5] = Vs
5862 instr [4, 0] = Vd. */
5863
5864 int shift;
5865 int full = INSTR (30, 30);
5866 unsigned vs = INSTR (9, 5);
5867 unsigned vd = INSTR (4, 0);
5868 unsigned i;
5869
5870 NYI_assert (29, 23, 0x1E);
5871 NYI_assert (15, 10, 0x15);
5872
5873 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5874 if (INSTR (22, 22))
5875 {
5876 shift = INSTR (21, 16);
5877
5878 if (full == 0)
5879 HALT_UNALLOC;
5880
5881 for (i = 0; i < 2; i++)
5882 {
5883 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5884 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5885 }
5886
5887 return;
5888 }
5889
5890 if (INSTR (21, 21))
5891 {
5892 shift = INSTR (20, 16);
5893
5894 for (i = 0; i < (full ? 4 : 2); i++)
5895 {
5896 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5897 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5898 }
5899
5900 return;
5901 }
5902
5903 if (INSTR (20, 20))
5904 {
5905 shift = INSTR (19, 16);
5906
5907 for (i = 0; i < (full ? 8 : 4); i++)
5908 {
5909 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5910 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5911 }
5912
5913 return;
5914 }
5915
5916 if (INSTR (19, 19) == 0)
5917 HALT_UNALLOC;
5918
5919 shift = INSTR (18, 16);
5920
5921 for (i = 0; i < (full ? 16 : 8); i++)
5922 {
5923 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5924 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5925 }
5926 }
5927
5928 static void
5929 do_vec_SSHR_USHR (sim_cpu *cpu)
5930 {
5931 /* instr [31] = 0
5932 instr [30] = half(0)/full(1)
5933 instr [29] = signed(0)/unsigned(1)
5934 instr [28,23] = 0 1111 0
5935 instr [22,16] = size and shift amount
5936 instr [15,10] = 0000 01
5937 instr [9, 5] = Vs
5938 instr [4, 0] = Vd. */
5939
5940 int full = INSTR (30, 30);
5941 int sign = ! INSTR (29, 29);
5942 unsigned shift = INSTR (22, 16);
5943 unsigned vs = INSTR (9, 5);
5944 unsigned vd = INSTR (4, 0);
5945 unsigned i;
5946
5947 NYI_assert (28, 23, 0x1E);
5948 NYI_assert (15, 10, 0x01);
5949
5950 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5951 if (INSTR (22, 22))
5952 {
5953 shift = 128 - shift;
5954
5955 if (full == 0)
5956 HALT_UNALLOC;
5957
5958 if (sign)
5959 for (i = 0; i < 2; i++)
5960 {
5961 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5962 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5963 }
5964 else
5965 for (i = 0; i < 2; i++)
5966 {
5967 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5968 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5969 }
5970
5971 return;
5972 }
5973
5974 if (INSTR (21, 21))
5975 {
5976 shift = 64 - shift;
5977
5978 if (sign)
5979 for (i = 0; i < (full ? 4 : 2); i++)
5980 {
5981 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5982 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5983 }
5984 else
5985 for (i = 0; i < (full ? 4 : 2); i++)
5986 {
5987 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5988 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5989 }
5990
5991 return;
5992 }
5993
5994 if (INSTR (20, 20))
5995 {
5996 shift = 32 - shift;
5997
5998 if (sign)
5999 for (i = 0; i < (full ? 8 : 4); i++)
6000 {
6001 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
6002 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
6003 }
6004 else
6005 for (i = 0; i < (full ? 8 : 4); i++)
6006 {
6007 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
6008 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
6009 }
6010
6011 return;
6012 }
6013
6014 if (INSTR (19, 19) == 0)
6015 HALT_UNALLOC;
6016
6017 shift = 16 - shift;
6018
6019 if (sign)
6020 for (i = 0; i < (full ? 16 : 8); i++)
6021 {
6022 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
6023 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
6024 }
6025 else
6026 for (i = 0; i < (full ? 16 : 8); i++)
6027 {
6028 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
6029 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
6030 }
6031 }
6032
6033 static void
6034 do_vec_MUL_by_element (sim_cpu *cpu)
6035 {
6036 /* instr[31] = 0
6037 instr[30] = half/full
6038 instr[29,24] = 00 1111
6039 instr[23,22] = size
6040 instr[21] = L
6041 instr[20] = M
6042 instr[19,16] = m
6043 instr[15,12] = 1000
6044 instr[11] = H
6045 instr[10] = 0
6046 instr[9,5] = Vn
6047 instr[4,0] = Vd */
6048
6049 unsigned full = INSTR (30, 30);
6050 unsigned L = INSTR (21, 21);
6051 unsigned H = INSTR (11, 11);
6052 unsigned vn = INSTR (9, 5);
6053 unsigned vd = INSTR (4, 0);
6054 unsigned size = INSTR (23, 22);
6055 unsigned index;
6056 unsigned vm;
6057 unsigned e;
6058
6059 NYI_assert (29, 24, 0x0F);
6060 NYI_assert (15, 12, 0x8);
6061 NYI_assert (10, 10, 0);
6062
6063 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6064 switch (size)
6065 {
6066 case 1:
6067 {
6068 /* 16 bit products. */
6069 uint16_t product;
6070 uint16_t element1;
6071 uint16_t element2;
6072
6073 index = (H << 2) | (L << 1) | INSTR (20, 20);
6074 vm = INSTR (19, 16);
6075 element2 = aarch64_get_vec_u16 (cpu, vm, index);
6076
6077 for (e = 0; e < (full ? 8 : 4); e ++)
6078 {
6079 element1 = aarch64_get_vec_u16 (cpu, vn, e);
6080 product = element1 * element2;
6081 aarch64_set_vec_u16 (cpu, vd, e, product);
6082 }
6083 }
6084 break;
6085
6086 case 2:
6087 {
6088 /* 32 bit products. */
6089 uint32_t product;
6090 uint32_t element1;
6091 uint32_t element2;
6092
6093 index = (H << 1) | L;
6094 vm = INSTR (20, 16);
6095 element2 = aarch64_get_vec_u32 (cpu, vm, index);
6096
6097 for (e = 0; e < (full ? 4 : 2); e ++)
6098 {
6099 element1 = aarch64_get_vec_u32 (cpu, vn, e);
6100 product = element1 * element2;
6101 aarch64_set_vec_u32 (cpu, vd, e, product);
6102 }
6103 }
6104 break;
6105
6106 default:
6107 HALT_UNALLOC;
6108 }
6109 }
6110
6111 static void
6112 do_FMLA_by_element (sim_cpu *cpu)
6113 {
6114 /* instr[31] = 0
6115 instr[30] = half/full
6116 instr[29,23] = 00 1111 1
6117 instr[22] = size
6118 instr[21] = L
6119 instr[20,16] = m
6120 instr[15,12] = 0001
6121 instr[11] = H
6122 instr[10] = 0
6123 instr[9,5] = Vn
6124 instr[4,0] = Vd */
6125
6126 unsigned full = INSTR (30, 30);
6127 unsigned size = INSTR (22, 22);
6128 unsigned L = INSTR (21, 21);
6129 unsigned vm = INSTR (20, 16);
6130 unsigned H = INSTR (11, 11);
6131 unsigned vn = INSTR (9, 5);
6132 unsigned vd = INSTR (4, 0);
6133 unsigned e;
6134
6135 NYI_assert (29, 23, 0x1F);
6136 NYI_assert (15, 12, 0x1);
6137 NYI_assert (10, 10, 0);
6138
6139 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6140 if (size)
6141 {
6142 double element1, element2;
6143
6144 if (! full || L)
6145 HALT_UNALLOC;
6146
6147 element2 = aarch64_get_vec_double (cpu, vm, H);
6148
6149 for (e = 0; e < 2; e++)
6150 {
6151 element1 = aarch64_get_vec_double (cpu, vn, e);
6152 element1 *= element2;
6153 element1 += aarch64_get_vec_double (cpu, vd, e);
6154 aarch64_set_vec_double (cpu, vd, e, element1);
6155 }
6156 }
6157 else
6158 {
6159 float element1;
6160 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6161
6162 for (e = 0; e < (full ? 4 : 2); e++)
6163 {
6164 element1 = aarch64_get_vec_float (cpu, vn, e);
6165 element1 *= element2;
6166 element1 += aarch64_get_vec_float (cpu, vd, e);
6167 aarch64_set_vec_float (cpu, vd, e, element1);
6168 }
6169 }
6170 }
6171
6172 static void
6173 do_vec_op2 (sim_cpu *cpu)
6174 {
6175 /* instr[31] = 0
6176 instr[30] = half/full
6177 instr[29,24] = 00 1111
6178 instr[23] = ?
6179 instr[22,16] = element size & index
6180 instr[15,10] = sub-opcode
6181 instr[9,5] = Vm
6182 instr[4,0] = Vd */
6183
6184 NYI_assert (29, 24, 0x0F);
6185
6186 if (INSTR (23, 23) != 0)
6187 {
6188 switch (INSTR (15, 10))
6189 {
6190 case 0x04:
6191 case 0x06:
6192 do_FMLA_by_element (cpu);
6193 return;
6194
6195 case 0x20:
6196 case 0x22:
6197 do_vec_MUL_by_element (cpu);
6198 return;
6199
6200 default:
6201 HALT_NYI;
6202 }
6203 }
6204 else
6205 {
6206 switch (INSTR (15, 10))
6207 {
6208 case 0x01: do_vec_SSHR_USHR (cpu); return;
6209 case 0x15: do_vec_SHL (cpu); return;
6210 case 0x20:
6211 case 0x22: do_vec_MUL_by_element (cpu); return;
6212 case 0x29: do_vec_xtl (cpu); return;
6213 default: HALT_NYI;
6214 }
6215 }
6216 }
6217
6218 static void
6219 do_vec_neg (sim_cpu *cpu)
6220 {
6221 /* instr[31] = 0
6222 instr[30] = full(1)/half(0)
6223 instr[29,24] = 10 1110
6224 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6225 instr[21,10] = 1000 0010 1110
6226 instr[9,5] = Vs
6227 instr[4,0] = Vd */
6228
6229 int full = INSTR (30, 30);
6230 unsigned vs = INSTR (9, 5);
6231 unsigned vd = INSTR (4, 0);
6232 unsigned i;
6233
6234 NYI_assert (29, 24, 0x2E);
6235 NYI_assert (21, 10, 0x82E);
6236
6237 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6238 switch (INSTR (23, 22))
6239 {
6240 case 0:
6241 for (i = 0; i < (full ? 16 : 8); i++)
6242 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6243 return;
6244
6245 case 1:
6246 for (i = 0; i < (full ? 8 : 4); i++)
6247 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6248 return;
6249
6250 case 2:
6251 for (i = 0; i < (full ? 4 : 2); i++)
6252 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6253 return;
6254
6255 case 3:
6256 if (! full)
6257 HALT_NYI;
6258 for (i = 0; i < 2; i++)
6259 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6260 return;
6261 }
6262 }
6263
6264 static void
6265 do_vec_sqrt (sim_cpu *cpu)
6266 {
6267 /* instr[31] = 0
6268 instr[30] = full(1)/half(0)
6269 instr[29,23] = 101 1101
6270 instr[22] = single(0)/double(1)
6271 instr[21,10] = 1000 0111 1110
6272 instr[9,5] = Vs
6273 instr[4,0] = Vd. */
6274
6275 int full = INSTR (30, 30);
6276 unsigned vs = INSTR (9, 5);
6277 unsigned vd = INSTR (4, 0);
6278 unsigned i;
6279
6280 NYI_assert (29, 23, 0x5B);
6281 NYI_assert (21, 10, 0x87E);
6282
6283 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6284 if (INSTR (22, 22) == 0)
6285 for (i = 0; i < (full ? 4 : 2); i++)
6286 aarch64_set_vec_float (cpu, vd, i,
6287 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6288 else
6289 for (i = 0; i < 2; i++)
6290 aarch64_set_vec_double (cpu, vd, i,
6291 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6292 }
6293
6294 static void
6295 do_vec_mls_indexed (sim_cpu *cpu)
6296 {
6297 /* instr[31] = 0
6298 instr[30] = half(0)/full(1)
6299 instr[29,24] = 10 1111
6300 instr[23,22] = 16-bit(01)/32-bit(10)
6301 instr[21,20+11] = index (if 16-bit)
6302 instr[21+11] = index (if 32-bit)
6303 instr[20,16] = Vm
6304 instr[15,12] = 0100
6305 instr[11] = part of index
6306 instr[10] = 0
6307 instr[9,5] = Vs
6308 instr[4,0] = Vd. */
6309
6310 int full = INSTR (30, 30);
6311 unsigned vs = INSTR (9, 5);
6312 unsigned vd = INSTR (4, 0);
6313 unsigned vm = INSTR (20, 16);
6314 unsigned i;
6315
6316 NYI_assert (15, 12, 4);
6317 NYI_assert (10, 10, 0);
6318
6319 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6320 switch (INSTR (23, 22))
6321 {
6322 case 1:
6323 {
6324 unsigned elem;
6325 uint32_t val;
6326
6327 if (vm > 15)
6328 HALT_NYI;
6329
6330 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6331 val = aarch64_get_vec_u16 (cpu, vm, elem);
6332
6333 for (i = 0; i < (full ? 8 : 4); i++)
6334 aarch64_set_vec_u32 (cpu, vd, i,
6335 aarch64_get_vec_u32 (cpu, vd, i) -
6336 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6337 return;
6338 }
6339
6340 case 2:
6341 {
6342 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6343 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6344
6345 for (i = 0; i < (full ? 4 : 2); i++)
6346 aarch64_set_vec_u64 (cpu, vd, i,
6347 aarch64_get_vec_u64 (cpu, vd, i) -
6348 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6349 return;
6350 }
6351
6352 case 0:
6353 case 3:
6354 default:
6355 HALT_NYI;
6356 }
6357 }
6358
6359 static void
6360 do_vec_SUB (sim_cpu *cpu)
6361 {
6362 /* instr [31] = 0
6363 instr [30] = half(0)/full(1)
6364 instr [29,24] = 10 1110
6365 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6366 instr [21] = 1
6367 instr [20,16] = Vm
6368 instr [15,10] = 10 0001
6369 instr [9, 5] = Vn
6370 instr [4, 0] = Vd. */
6371
6372 unsigned full = INSTR (30, 30);
6373 unsigned vm = INSTR (20, 16);
6374 unsigned vn = INSTR (9, 5);
6375 unsigned vd = INSTR (4, 0);
6376 unsigned i;
6377
6378 NYI_assert (29, 24, 0x2E);
6379 NYI_assert (21, 21, 1);
6380 NYI_assert (15, 10, 0x21);
6381
6382 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6383 switch (INSTR (23, 22))
6384 {
6385 case 0:
6386 for (i = 0; i < (full ? 16 : 8); i++)
6387 aarch64_set_vec_s8 (cpu, vd, i,
6388 aarch64_get_vec_s8 (cpu, vn, i)
6389 - aarch64_get_vec_s8 (cpu, vm, i));
6390 return;
6391
6392 case 1:
6393 for (i = 0; i < (full ? 8 : 4); i++)
6394 aarch64_set_vec_s16 (cpu, vd, i,
6395 aarch64_get_vec_s16 (cpu, vn, i)
6396 - aarch64_get_vec_s16 (cpu, vm, i));
6397 return;
6398
6399 case 2:
6400 for (i = 0; i < (full ? 4 : 2); i++)
6401 aarch64_set_vec_s32 (cpu, vd, i,
6402 aarch64_get_vec_s32 (cpu, vn, i)
6403 - aarch64_get_vec_s32 (cpu, vm, i));
6404 return;
6405
6406 case 3:
6407 if (full == 0)
6408 HALT_UNALLOC;
6409
6410 for (i = 0; i < 2; i++)
6411 aarch64_set_vec_s64 (cpu, vd, i,
6412 aarch64_get_vec_s64 (cpu, vn, i)
6413 - aarch64_get_vec_s64 (cpu, vm, i));
6414 return;
6415 }
6416 }
6417
6418 static void
6419 do_vec_MLS (sim_cpu *cpu)
6420 {
6421 /* instr [31] = 0
6422 instr [30] = half(0)/full(1)
6423 instr [29,24] = 10 1110
6424 instr [23,22] = size: byte(00, half(01), word (10)
6425 instr [21] = 1
6426 instr [20,16] = Vm
6427 instr [15,10] = 10 0101
6428 instr [9, 5] = Vn
6429 instr [4, 0] = Vd. */
6430
6431 unsigned full = INSTR (30, 30);
6432 unsigned vm = INSTR (20, 16);
6433 unsigned vn = INSTR (9, 5);
6434 unsigned vd = INSTR (4, 0);
6435 unsigned i;
6436
6437 NYI_assert (29, 24, 0x2E);
6438 NYI_assert (21, 21, 1);
6439 NYI_assert (15, 10, 0x25);
6440
6441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6442 switch (INSTR (23, 22))
6443 {
6444 case 0:
6445 for (i = 0; i < (full ? 16 : 8); i++)
6446 aarch64_set_vec_u8 (cpu, vd, i,
6447 aarch64_get_vec_u8 (cpu, vd, i)
6448 - (aarch64_get_vec_u8 (cpu, vn, i)
6449 * aarch64_get_vec_u8 (cpu, vm, i)));
6450 return;
6451
6452 case 1:
6453 for (i = 0; i < (full ? 8 : 4); i++)
6454 aarch64_set_vec_u16 (cpu, vd, i,
6455 aarch64_get_vec_u16 (cpu, vd, i)
6456 - (aarch64_get_vec_u16 (cpu, vn, i)
6457 * aarch64_get_vec_u16 (cpu, vm, i)));
6458 return;
6459
6460 case 2:
6461 for (i = 0; i < (full ? 4 : 2); i++)
6462 aarch64_set_vec_u32 (cpu, vd, i,
6463 aarch64_get_vec_u32 (cpu, vd, i)
6464 - (aarch64_get_vec_u32 (cpu, vn, i)
6465 * aarch64_get_vec_u32 (cpu, vm, i)));
6466 return;
6467
6468 default:
6469 HALT_UNALLOC;
6470 }
6471 }
6472
6473 static void
6474 do_vec_FDIV (sim_cpu *cpu)
6475 {
6476 /* instr [31] = 0
6477 instr [30] = half(0)/full(1)
6478 instr [29,23] = 10 1110 0
6479 instr [22] = float()/double(1)
6480 instr [21] = 1
6481 instr [20,16] = Vm
6482 instr [15,10] = 1111 11
6483 instr [9, 5] = Vn
6484 instr [4, 0] = Vd. */
6485
6486 unsigned full = INSTR (30, 30);
6487 unsigned vm = INSTR (20, 16);
6488 unsigned vn = INSTR (9, 5);
6489 unsigned vd = INSTR (4, 0);
6490 unsigned i;
6491
6492 NYI_assert (29, 23, 0x5C);
6493 NYI_assert (21, 21, 1);
6494 NYI_assert (15, 10, 0x3F);
6495
6496 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6497 if (INSTR (22, 22))
6498 {
6499 if (! full)
6500 HALT_UNALLOC;
6501
6502 for (i = 0; i < 2; i++)
6503 aarch64_set_vec_double (cpu, vd, i,
6504 aarch64_get_vec_double (cpu, vn, i)
6505 / aarch64_get_vec_double (cpu, vm, i));
6506 }
6507 else
6508 for (i = 0; i < (full ? 4 : 2); i++)
6509 aarch64_set_vec_float (cpu, vd, i,
6510 aarch64_get_vec_float (cpu, vn, i)
6511 / aarch64_get_vec_float (cpu, vm, i));
6512 }
6513
6514 static void
6515 do_vec_FMUL (sim_cpu *cpu)
6516 {
6517 /* instr [31] = 0
6518 instr [30] = half(0)/full(1)
6519 instr [29,23] = 10 1110 0
6520 instr [22] = float(0)/double(1)
6521 instr [21] = 1
6522 instr [20,16] = Vm
6523 instr [15,10] = 1101 11
6524 instr [9, 5] = Vn
6525 instr [4, 0] = Vd. */
6526
6527 unsigned full = INSTR (30, 30);
6528 unsigned vm = INSTR (20, 16);
6529 unsigned vn = INSTR (9, 5);
6530 unsigned vd = INSTR (4, 0);
6531 unsigned i;
6532
6533 NYI_assert (29, 23, 0x5C);
6534 NYI_assert (21, 21, 1);
6535 NYI_assert (15, 10, 0x37);
6536
6537 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6538 if (INSTR (22, 22))
6539 {
6540 if (! full)
6541 HALT_UNALLOC;
6542
6543 for (i = 0; i < 2; i++)
6544 aarch64_set_vec_double (cpu, vd, i,
6545 aarch64_get_vec_double (cpu, vn, i)
6546 * aarch64_get_vec_double (cpu, vm, i));
6547 }
6548 else
6549 for (i = 0; i < (full ? 4 : 2); i++)
6550 aarch64_set_vec_float (cpu, vd, i,
6551 aarch64_get_vec_float (cpu, vn, i)
6552 * aarch64_get_vec_float (cpu, vm, i));
6553 }
6554
6555 static void
6556 do_vec_FADDP (sim_cpu *cpu)
6557 {
6558 /* instr [31] = 0
6559 instr [30] = half(0)/full(1)
6560 instr [29,23] = 10 1110 0
6561 instr [22] = float(0)/double(1)
6562 instr [21] = 1
6563 instr [20,16] = Vm
6564 instr [15,10] = 1101 01
6565 instr [9, 5] = Vn
6566 instr [4, 0] = Vd. */
6567
6568 unsigned full = INSTR (30, 30);
6569 unsigned vm = INSTR (20, 16);
6570 unsigned vn = INSTR (9, 5);
6571 unsigned vd = INSTR (4, 0);
6572
6573 NYI_assert (29, 23, 0x5C);
6574 NYI_assert (21, 21, 1);
6575 NYI_assert (15, 10, 0x35);
6576
6577 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6578 if (INSTR (22, 22))
6579 {
6580 /* Extract values before adding them incase vd == vn/vm. */
6581 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6582 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6583 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6584 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6585
6586 if (! full)
6587 HALT_UNALLOC;
6588
6589 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6590 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6591 }
6592 else
6593 {
6594 /* Extract values before adding them incase vd == vn/vm. */
6595 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6596 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6597 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6598 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6599
6600 if (full)
6601 {
6602 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6603 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6604 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6605 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6606
6607 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6608 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6609 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6610 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6611 }
6612 else
6613 {
6614 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6615 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6616 }
6617 }
6618 }
6619
6620 static void
6621 do_vec_FSQRT (sim_cpu *cpu)
6622 {
6623 /* instr[31] = 0
6624 instr[30] = half(0)/full(1)
6625 instr[29,23] = 10 1110 1
6626 instr[22] = single(0)/double(1)
6627 instr[21,10] = 10 0001 1111 10
6628 instr[9,5] = Vsrc
6629 instr[4,0] = Vdest. */
6630
6631 unsigned vn = INSTR (9, 5);
6632 unsigned vd = INSTR (4, 0);
6633 unsigned full = INSTR (30, 30);
6634 int i;
6635
6636 NYI_assert (29, 23, 0x5D);
6637 NYI_assert (21, 10, 0x87E);
6638
6639 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6640 if (INSTR (22, 22))
6641 {
6642 if (! full)
6643 HALT_UNALLOC;
6644
6645 for (i = 0; i < 2; i++)
6646 aarch64_set_vec_double (cpu, vd, i,
6647 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6648 }
6649 else
6650 {
6651 for (i = 0; i < (full ? 4 : 2); i++)
6652 aarch64_set_vec_float (cpu, vd, i,
6653 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6654 }
6655 }
6656
6657 static void
6658 do_vec_FNEG (sim_cpu *cpu)
6659 {
6660 /* instr[31] = 0
6661 instr[30] = half (0)/full (1)
6662 instr[29,23] = 10 1110 1
6663 instr[22] = single (0)/double (1)
6664 instr[21,10] = 10 0000 1111 10
6665 instr[9,5] = Vsrc
6666 instr[4,0] = Vdest. */
6667
6668 unsigned vn = INSTR (9, 5);
6669 unsigned vd = INSTR (4, 0);
6670 unsigned full = INSTR (30, 30);
6671 int i;
6672
6673 NYI_assert (29, 23, 0x5D);
6674 NYI_assert (21, 10, 0x83E);
6675
6676 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6677 if (INSTR (22, 22))
6678 {
6679 if (! full)
6680 HALT_UNALLOC;
6681
6682 for (i = 0; i < 2; i++)
6683 aarch64_set_vec_double (cpu, vd, i,
6684 - aarch64_get_vec_double (cpu, vn, i));
6685 }
6686 else
6687 {
6688 for (i = 0; i < (full ? 4 : 2); i++)
6689 aarch64_set_vec_float (cpu, vd, i,
6690 - aarch64_get_vec_float (cpu, vn, i));
6691 }
6692 }
6693
6694 static void
6695 do_vec_NOT (sim_cpu *cpu)
6696 {
6697 /* instr[31] = 0
6698 instr[30] = half (0)/full (1)
6699 instr[29,10] = 10 1110 0010 0000 0101 10
6700 instr[9,5] = Vn
6701 instr[4.0] = Vd. */
6702
6703 unsigned vn = INSTR (9, 5);
6704 unsigned vd = INSTR (4, 0);
6705 unsigned i;
6706 int full = INSTR (30, 30);
6707
6708 NYI_assert (29, 10, 0xB8816);
6709
6710 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6711 for (i = 0; i < (full ? 16 : 8); i++)
6712 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6713 }
6714
6715 static unsigned int
6716 clz (uint64_t val, unsigned size)
6717 {
6718 uint64_t mask = 1;
6719 int count;
6720
6721 mask <<= (size - 1);
6722 count = 0;
6723 do
6724 {
6725 if (val & mask)
6726 break;
6727 mask >>= 1;
6728 count ++;
6729 }
6730 while (mask);
6731
6732 return count;
6733 }
6734
6735 static void
6736 do_vec_CLZ (sim_cpu *cpu)
6737 {
6738 /* instr[31] = 0
6739 instr[30] = half (0)/full (1)
6740 instr[29,24] = 10 1110
6741 instr[23,22] = size
6742 instr[21,10] = 10 0000 0100 10
6743 instr[9,5] = Vn
6744 instr[4.0] = Vd. */
6745
6746 unsigned vn = INSTR (9, 5);
6747 unsigned vd = INSTR (4, 0);
6748 unsigned i;
6749 int full = INSTR (30,30);
6750
6751 NYI_assert (29, 24, 0x2E);
6752 NYI_assert (21, 10, 0x812);
6753
6754 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6755 switch (INSTR (23, 22))
6756 {
6757 case 0:
6758 for (i = 0; i < (full ? 16 : 8); i++)
6759 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6760 break;
6761 case 1:
6762 for (i = 0; i < (full ? 8 : 4); i++)
6763 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6764 break;
6765 case 2:
6766 for (i = 0; i < (full ? 4 : 2); i++)
6767 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6768 break;
6769 case 3:
6770 if (! full)
6771 HALT_UNALLOC;
6772 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6773 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6774 break;
6775 }
6776 }
6777
6778 static void
6779 do_vec_MOV_element (sim_cpu *cpu)
6780 {
6781 /* instr[31,21] = 0110 1110 000
6782 instr[20,16] = size & dest index
6783 instr[15] = 0
6784 instr[14,11] = source index
6785 instr[10] = 1
6786 instr[9,5] = Vs
6787 instr[4.0] = Vd. */
6788
6789 unsigned vs = INSTR (9, 5);
6790 unsigned vd = INSTR (4, 0);
6791 unsigned src_index;
6792 unsigned dst_index;
6793
6794 NYI_assert (31, 21, 0x370);
6795 NYI_assert (15, 15, 0);
6796 NYI_assert (10, 10, 1);
6797
6798 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6799 if (INSTR (16, 16))
6800 {
6801 /* Move a byte. */
6802 src_index = INSTR (14, 11);
6803 dst_index = INSTR (20, 17);
6804 aarch64_set_vec_u8 (cpu, vd, dst_index,
6805 aarch64_get_vec_u8 (cpu, vs, src_index));
6806 }
6807 else if (INSTR (17, 17))
6808 {
6809 /* Move 16-bits. */
6810 NYI_assert (11, 11, 0);
6811 src_index = INSTR (14, 12);
6812 dst_index = INSTR (20, 18);
6813 aarch64_set_vec_u16 (cpu, vd, dst_index,
6814 aarch64_get_vec_u16 (cpu, vs, src_index));
6815 }
6816 else if (INSTR (18, 18))
6817 {
6818 /* Move 32-bits. */
6819 NYI_assert (12, 11, 0);
6820 src_index = INSTR (14, 13);
6821 dst_index = INSTR (20, 19);
6822 aarch64_set_vec_u32 (cpu, vd, dst_index,
6823 aarch64_get_vec_u32 (cpu, vs, src_index));
6824 }
6825 else
6826 {
6827 NYI_assert (19, 19, 1);
6828 NYI_assert (13, 11, 0);
6829 src_index = INSTR (14, 14);
6830 dst_index = INSTR (20, 20);
6831 aarch64_set_vec_u64 (cpu, vd, dst_index,
6832 aarch64_get_vec_u64 (cpu, vs, src_index));
6833 }
6834 }
6835
6836 static void
6837 do_vec_REV32 (sim_cpu *cpu)
6838 {
6839 /* instr[31] = 0
6840 instr[30] = full/half
6841 instr[29,24] = 10 1110
6842 instr[23,22] = size
6843 instr[21,10] = 10 0000 0000 10
6844 instr[9,5] = Rn
6845 instr[4,0] = Rd. */
6846
6847 unsigned rn = INSTR (9, 5);
6848 unsigned rd = INSTR (4, 0);
6849 unsigned size = INSTR (23, 22);
6850 unsigned full = INSTR (30, 30);
6851 unsigned i;
6852 FRegister val;
6853
6854 NYI_assert (29, 24, 0x2E);
6855 NYI_assert (21, 10, 0x802);
6856
6857 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6858 switch (size)
6859 {
6860 case 0:
6861 for (i = 0; i < (full ? 16 : 8); i++)
6862 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6863 break;
6864
6865 case 1:
6866 for (i = 0; i < (full ? 8 : 4); i++)
6867 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6868 break;
6869
6870 default:
6871 HALT_UNALLOC;
6872 }
6873
6874 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6875 if (full)
6876 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6877 }
6878
6879 static void
6880 do_vec_EXT (sim_cpu *cpu)
6881 {
6882 /* instr[31] = 0
6883 instr[30] = full/half
6884 instr[29,21] = 10 1110 000
6885 instr[20,16] = Vm
6886 instr[15] = 0
6887 instr[14,11] = source index
6888 instr[10] = 0
6889 instr[9,5] = Vn
6890 instr[4.0] = Vd. */
6891
6892 unsigned vm = INSTR (20, 16);
6893 unsigned vn = INSTR (9, 5);
6894 unsigned vd = INSTR (4, 0);
6895 unsigned src_index = INSTR (14, 11);
6896 unsigned full = INSTR (30, 30);
6897 unsigned i;
6898 unsigned j;
6899 FRegister val;
6900
6901 NYI_assert (31, 21, 0x370);
6902 NYI_assert (15, 15, 0);
6903 NYI_assert (10, 10, 0);
6904
6905 if (!full && (src_index & 0x8))
6906 HALT_UNALLOC;
6907
6908 j = 0;
6909
6910 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6911 for (i = src_index; i < (full ? 16 : 8); i++)
6912 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6913 for (i = 0; i < src_index; i++)
6914 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6915
6916 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6917 if (full)
6918 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6919 }
6920
6921 static void
6922 dexAdvSIMD0 (sim_cpu *cpu)
6923 {
6924 /* instr [28,25] = 0 111. */
6925 if ( INSTR (15, 10) == 0x07
6926 && (INSTR (9, 5) ==
6927 INSTR (20, 16)))
6928 {
6929 if (INSTR (31, 21) == 0x075
6930 || INSTR (31, 21) == 0x275)
6931 {
6932 do_vec_MOV_whole_vector (cpu);
6933 return;
6934 }
6935 }
6936
6937 if (INSTR (29, 19) == 0x1E0)
6938 {
6939 do_vec_MOV_immediate (cpu);
6940 return;
6941 }
6942
6943 if (INSTR (29, 19) == 0x5E0)
6944 {
6945 do_vec_MVNI (cpu);
6946 return;
6947 }
6948
6949 if (INSTR (29, 19) == 0x1C0
6950 || INSTR (29, 19) == 0x1C1)
6951 {
6952 if (INSTR (15, 10) == 0x03)
6953 {
6954 do_vec_DUP_scalar_into_vector (cpu);
6955 return;
6956 }
6957 }
6958
6959 switch (INSTR (29, 24))
6960 {
6961 case 0x0E: do_vec_op1 (cpu); return;
6962 case 0x0F: do_vec_op2 (cpu); return;
6963
6964 case 0x2E:
6965 if (INSTR (21, 21) == 1)
6966 {
6967 switch (INSTR (15, 10))
6968 {
6969 case 0x02:
6970 do_vec_REV32 (cpu);
6971 return;
6972
6973 case 0x07:
6974 switch (INSTR (23, 22))
6975 {
6976 case 0: do_vec_EOR (cpu); return;
6977 case 1: do_vec_BSL (cpu); return;
6978 case 2:
6979 case 3: do_vec_bit (cpu); return;
6980 }
6981 break;
6982
6983 case 0x08: do_vec_sub_long (cpu); return;
6984 case 0x11: do_vec_USHL (cpu); return;
6985 case 0x12: do_vec_CLZ (cpu); return;
6986 case 0x16: do_vec_NOT (cpu); return;
6987 case 0x19: do_vec_max (cpu); return;
6988 case 0x1B: do_vec_min (cpu); return;
6989 case 0x21: do_vec_SUB (cpu); return;
6990 case 0x25: do_vec_MLS (cpu); return;
6991 case 0x31: do_vec_FminmaxNMP (cpu); return;
6992 case 0x35: do_vec_FADDP (cpu); return;
6993 case 0x37: do_vec_FMUL (cpu); return;
6994 case 0x3F: do_vec_FDIV (cpu); return;
6995
6996 case 0x3E:
6997 switch (INSTR (20, 16))
6998 {
6999 case 0x00: do_vec_FNEG (cpu); return;
7000 case 0x01: do_vec_FSQRT (cpu); return;
7001 default: HALT_NYI;
7002 }
7003
7004 case 0x0D:
7005 case 0x0F:
7006 case 0x22:
7007 case 0x23:
7008 case 0x26:
7009 case 0x2A:
7010 case 0x32:
7011 case 0x36:
7012 case 0x39:
7013 case 0x3A:
7014 do_vec_compare (cpu); return;
7015
7016 default:
7017 break;
7018 }
7019 }
7020
7021 if (INSTR (31, 21) == 0x370)
7022 {
7023 if (INSTR (10, 10))
7024 do_vec_MOV_element (cpu);
7025 else
7026 do_vec_EXT (cpu);
7027 return;
7028 }
7029
7030 switch (INSTR (21, 10))
7031 {
7032 case 0x82E: do_vec_neg (cpu); return;
7033 case 0x87E: do_vec_sqrt (cpu); return;
7034 default:
7035 if (INSTR (15, 10) == 0x30)
7036 {
7037 do_vec_mull (cpu);
7038 return;
7039 }
7040 break;
7041 }
7042 break;
7043
7044 case 0x2f:
7045 switch (INSTR (15, 10))
7046 {
7047 case 0x01: do_vec_SSHR_USHR (cpu); return;
7048 case 0x10:
7049 case 0x12: do_vec_mls_indexed (cpu); return;
7050 case 0x29: do_vec_xtl (cpu); return;
7051 default:
7052 HALT_NYI;
7053 }
7054
7055 default:
7056 break;
7057 }
7058
7059 HALT_NYI;
7060 }
7061
7062 /* 3 sources. */
7063
7064 /* Float multiply add. */
7065 static void
7066 fmadds (sim_cpu *cpu)
7067 {
7068 unsigned sa = INSTR (14, 10);
7069 unsigned sm = INSTR (20, 16);
7070 unsigned sn = INSTR ( 9, 5);
7071 unsigned sd = INSTR ( 4, 0);
7072
7073 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7074 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7075 + aarch64_get_FP_float (cpu, sn)
7076 * aarch64_get_FP_float (cpu, sm));
7077 }
7078
7079 /* Double multiply add. */
7080 static void
7081 fmaddd (sim_cpu *cpu)
7082 {
7083 unsigned sa = INSTR (14, 10);
7084 unsigned sm = INSTR (20, 16);
7085 unsigned sn = INSTR ( 9, 5);
7086 unsigned sd = INSTR ( 4, 0);
7087
7088 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7089 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7090 + aarch64_get_FP_double (cpu, sn)
7091 * aarch64_get_FP_double (cpu, sm));
7092 }
7093
7094 /* Float multiply subtract. */
7095 static void
7096 fmsubs (sim_cpu *cpu)
7097 {
7098 unsigned sa = INSTR (14, 10);
7099 unsigned sm = INSTR (20, 16);
7100 unsigned sn = INSTR ( 9, 5);
7101 unsigned sd = INSTR ( 4, 0);
7102
7103 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7104 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7105 - aarch64_get_FP_float (cpu, sn)
7106 * aarch64_get_FP_float (cpu, sm));
7107 }
7108
7109 /* Double multiply subtract. */
7110 static void
7111 fmsubd (sim_cpu *cpu)
7112 {
7113 unsigned sa = INSTR (14, 10);
7114 unsigned sm = INSTR (20, 16);
7115 unsigned sn = INSTR ( 9, 5);
7116 unsigned sd = INSTR ( 4, 0);
7117
7118 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7119 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7120 - aarch64_get_FP_double (cpu, sn)
7121 * aarch64_get_FP_double (cpu, sm));
7122 }
7123
7124 /* Float negative multiply add. */
7125 static void
7126 fnmadds (sim_cpu *cpu)
7127 {
7128 unsigned sa = INSTR (14, 10);
7129 unsigned sm = INSTR (20, 16);
7130 unsigned sn = INSTR ( 9, 5);
7131 unsigned sd = INSTR ( 4, 0);
7132
7133 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7134 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7135 + (- aarch64_get_FP_float (cpu, sn))
7136 * aarch64_get_FP_float (cpu, sm));
7137 }
7138
7139 /* Double negative multiply add. */
7140 static void
7141 fnmaddd (sim_cpu *cpu)
7142 {
7143 unsigned sa = INSTR (14, 10);
7144 unsigned sm = INSTR (20, 16);
7145 unsigned sn = INSTR ( 9, 5);
7146 unsigned sd = INSTR ( 4, 0);
7147
7148 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7149 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7150 + (- aarch64_get_FP_double (cpu, sn))
7151 * aarch64_get_FP_double (cpu, sm));
7152 }
7153
7154 /* Float negative multiply subtract. */
7155 static void
7156 fnmsubs (sim_cpu *cpu)
7157 {
7158 unsigned sa = INSTR (14, 10);
7159 unsigned sm = INSTR (20, 16);
7160 unsigned sn = INSTR ( 9, 5);
7161 unsigned sd = INSTR ( 4, 0);
7162
7163 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7164 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7165 + aarch64_get_FP_float (cpu, sn)
7166 * aarch64_get_FP_float (cpu, sm));
7167 }
7168
7169 /* Double negative multiply subtract. */
7170 static void
7171 fnmsubd (sim_cpu *cpu)
7172 {
7173 unsigned sa = INSTR (14, 10);
7174 unsigned sm = INSTR (20, 16);
7175 unsigned sn = INSTR ( 9, 5);
7176 unsigned sd = INSTR ( 4, 0);
7177
7178 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7179 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7180 + aarch64_get_FP_double (cpu, sn)
7181 * aarch64_get_FP_double (cpu, sm));
7182 }
7183
7184 static void
7185 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7186 {
7187 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7188 instr[30] = 0
7189 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7190 instr[28,25] = 1111
7191 instr[24] = 1
7192 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7193 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7194 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7195
7196 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7197 /* dispatch on combined type:o1:o2. */
7198 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7199
7200 if (M_S != 0)
7201 HALT_UNALLOC;
7202
7203 switch (dispatch)
7204 {
7205 case 0: fmadds (cpu); return;
7206 case 1: fmsubs (cpu); return;
7207 case 2: fnmadds (cpu); return;
7208 case 3: fnmsubs (cpu); return;
7209 case 4: fmaddd (cpu); return;
7210 case 5: fmsubd (cpu); return;
7211 case 6: fnmaddd (cpu); return;
7212 case 7: fnmsubd (cpu); return;
7213 default:
7214 /* type > 1 is currently unallocated. */
7215 HALT_UNALLOC;
7216 }
7217 }
7218
7219 static void
7220 dexSimpleFPFixedConvert (sim_cpu *cpu)
7221 {
7222 HALT_NYI;
7223 }
7224
7225 static void
7226 dexSimpleFPCondCompare (sim_cpu *cpu)
7227 {
7228 /* instr [31,23] = 0001 1110 0
7229 instr [22] = type
7230 instr [21] = 1
7231 instr [20,16] = Rm
7232 instr [15,12] = condition
7233 instr [11,10] = 01
7234 instr [9,5] = Rn
7235 instr [4] = 0
7236 instr [3,0] = nzcv */
7237
7238 unsigned rm = INSTR (20, 16);
7239 unsigned rn = INSTR (9, 5);
7240
7241 NYI_assert (31, 23, 0x3C);
7242 NYI_assert (11, 10, 0x1);
7243 NYI_assert (4, 4, 0);
7244
7245 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7246 if (! testConditionCode (cpu, INSTR (15, 12)))
7247 {
7248 aarch64_set_CPSR (cpu, INSTR (3, 0));
7249 return;
7250 }
7251
7252 if (INSTR (22, 22))
7253 {
7254 /* Double precision. */
7255 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7256 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7257
7258 /* FIXME: Check for NaNs. */
7259 if (val1 == val2)
7260 aarch64_set_CPSR (cpu, (Z | C));
7261 else if (val1 < val2)
7262 aarch64_set_CPSR (cpu, N);
7263 else /* val1 > val2 */
7264 aarch64_set_CPSR (cpu, C);
7265 }
7266 else
7267 {
7268 /* Single precision. */
7269 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7270 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7271
7272 /* FIXME: Check for NaNs. */
7273 if (val1 == val2)
7274 aarch64_set_CPSR (cpu, (Z | C));
7275 else if (val1 < val2)
7276 aarch64_set_CPSR (cpu, N);
7277 else /* val1 > val2 */
7278 aarch64_set_CPSR (cpu, C);
7279 }
7280 }
7281
7282 /* 2 sources. */
7283
7284 /* Float add. */
7285 static void
7286 fadds (sim_cpu *cpu)
7287 {
7288 unsigned sm = INSTR (20, 16);
7289 unsigned sn = INSTR ( 9, 5);
7290 unsigned sd = INSTR ( 4, 0);
7291
7292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7293 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7294 + aarch64_get_FP_float (cpu, sm));
7295 }
7296
7297 /* Double add. */
7298 static void
7299 faddd (sim_cpu *cpu)
7300 {
7301 unsigned sm = INSTR (20, 16);
7302 unsigned sn = INSTR ( 9, 5);
7303 unsigned sd = INSTR ( 4, 0);
7304
7305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7306 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7307 + aarch64_get_FP_double (cpu, sm));
7308 }
7309
7310 /* Float divide. */
7311 static void
7312 fdivs (sim_cpu *cpu)
7313 {
7314 unsigned sm = INSTR (20, 16);
7315 unsigned sn = INSTR ( 9, 5);
7316 unsigned sd = INSTR ( 4, 0);
7317
7318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7319 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7320 / aarch64_get_FP_float (cpu, sm));
7321 }
7322
7323 /* Double divide. */
7324 static void
7325 fdivd (sim_cpu *cpu)
7326 {
7327 unsigned sm = INSTR (20, 16);
7328 unsigned sn = INSTR ( 9, 5);
7329 unsigned sd = INSTR ( 4, 0);
7330
7331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7332 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7333 / aarch64_get_FP_double (cpu, sm));
7334 }
7335
7336 /* Float multiply. */
7337 static void
7338 fmuls (sim_cpu *cpu)
7339 {
7340 unsigned sm = INSTR (20, 16);
7341 unsigned sn = INSTR ( 9, 5);
7342 unsigned sd = INSTR ( 4, 0);
7343
7344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7345 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7346 * aarch64_get_FP_float (cpu, sm));
7347 }
7348
7349 /* Double multiply. */
7350 static void
7351 fmuld (sim_cpu *cpu)
7352 {
7353 unsigned sm = INSTR (20, 16);
7354 unsigned sn = INSTR ( 9, 5);
7355 unsigned sd = INSTR ( 4, 0);
7356
7357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7358 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7359 * aarch64_get_FP_double (cpu, sm));
7360 }
7361
7362 /* Float negate and multiply. */
7363 static void
7364 fnmuls (sim_cpu *cpu)
7365 {
7366 unsigned sm = INSTR (20, 16);
7367 unsigned sn = INSTR ( 9, 5);
7368 unsigned sd = INSTR ( 4, 0);
7369
7370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7371 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7372 * aarch64_get_FP_float (cpu, sm)));
7373 }
7374
7375 /* Double negate and multiply. */
7376 static void
7377 fnmuld (sim_cpu *cpu)
7378 {
7379 unsigned sm = INSTR (20, 16);
7380 unsigned sn = INSTR ( 9, 5);
7381 unsigned sd = INSTR ( 4, 0);
7382
7383 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7384 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7385 * aarch64_get_FP_double (cpu, sm)));
7386 }
7387
7388 /* Float subtract. */
7389 static void
7390 fsubs (sim_cpu *cpu)
7391 {
7392 unsigned sm = INSTR (20, 16);
7393 unsigned sn = INSTR ( 9, 5);
7394 unsigned sd = INSTR ( 4, 0);
7395
7396 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7397 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7398 - aarch64_get_FP_float (cpu, sm));
7399 }
7400
7401 /* Double subtract. */
7402 static void
7403 fsubd (sim_cpu *cpu)
7404 {
7405 unsigned sm = INSTR (20, 16);
7406 unsigned sn = INSTR ( 9, 5);
7407 unsigned sd = INSTR ( 4, 0);
7408
7409 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7410 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7411 - aarch64_get_FP_double (cpu, sm));
7412 }
7413
7414 static void
7415 do_FMINNM (sim_cpu *cpu)
7416 {
7417 /* instr[31,23] = 0 0011 1100
7418 instr[22] = float(0)/double(1)
7419 instr[21] = 1
7420 instr[20,16] = Sm
7421 instr[15,10] = 01 1110
7422 instr[9,5] = Sn
7423 instr[4,0] = Cpu */
7424
7425 unsigned sm = INSTR (20, 16);
7426 unsigned sn = INSTR ( 9, 5);
7427 unsigned sd = INSTR ( 4, 0);
7428
7429 NYI_assert (31, 23, 0x03C);
7430 NYI_assert (15, 10, 0x1E);
7431
7432 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7433 if (INSTR (22, 22))
7434 aarch64_set_FP_double (cpu, sd,
7435 dminnm (aarch64_get_FP_double (cpu, sn),
7436 aarch64_get_FP_double (cpu, sm)));
7437 else
7438 aarch64_set_FP_float (cpu, sd,
7439 fminnm (aarch64_get_FP_float (cpu, sn),
7440 aarch64_get_FP_float (cpu, sm)));
7441 }
7442
7443 static void
7444 do_FMAXNM (sim_cpu *cpu)
7445 {
7446 /* instr[31,23] = 0 0011 1100
7447 instr[22] = float(0)/double(1)
7448 instr[21] = 1
7449 instr[20,16] = Sm
7450 instr[15,10] = 01 1010
7451 instr[9,5] = Sn
7452 instr[4,0] = Cpu */
7453
7454 unsigned sm = INSTR (20, 16);
7455 unsigned sn = INSTR ( 9, 5);
7456 unsigned sd = INSTR ( 4, 0);
7457
7458 NYI_assert (31, 23, 0x03C);
7459 NYI_assert (15, 10, 0x1A);
7460
7461 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7462 if (INSTR (22, 22))
7463 aarch64_set_FP_double (cpu, sd,
7464 dmaxnm (aarch64_get_FP_double (cpu, sn),
7465 aarch64_get_FP_double (cpu, sm)));
7466 else
7467 aarch64_set_FP_float (cpu, sd,
7468 fmaxnm (aarch64_get_FP_float (cpu, sn),
7469 aarch64_get_FP_float (cpu, sm)));
7470 }
7471
7472 static void
7473 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7474 {
7475 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7476 instr[30] = 0
7477 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7478 instr[28,25] = 1111
7479 instr[24] = 0
7480 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7481 instr[21] = 1
7482 instr[20,16] = Vm
7483 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7484 0010 ==> FADD, 0011 ==> FSUB,
7485 0100 ==> FMAX, 0101 ==> FMIN
7486 0110 ==> FMAXNM, 0111 ==> FMINNM
7487 1000 ==> FNMUL, ow ==> UNALLOC
7488 instr[11,10] = 10
7489 instr[9,5] = Vn
7490 instr[4,0] = Vd */
7491
7492 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7493 uint32_t type = INSTR (23, 22);
7494 /* Dispatch on opcode. */
7495 uint32_t dispatch = INSTR (15, 12);
7496
7497 if (type > 1)
7498 HALT_UNALLOC;
7499
7500 if (M_S != 0)
7501 HALT_UNALLOC;
7502
7503 if (type)
7504 switch (dispatch)
7505 {
7506 case 0: fmuld (cpu); return;
7507 case 1: fdivd (cpu); return;
7508 case 2: faddd (cpu); return;
7509 case 3: fsubd (cpu); return;
7510 case 6: do_FMAXNM (cpu); return;
7511 case 7: do_FMINNM (cpu); return;
7512 case 8: fnmuld (cpu); return;
7513
7514 /* Have not yet implemented fmax and fmin. */
7515 case 4:
7516 case 5:
7517 HALT_NYI;
7518
7519 default:
7520 HALT_UNALLOC;
7521 }
7522 else /* type == 0 => floats. */
7523 switch (dispatch)
7524 {
7525 case 0: fmuls (cpu); return;
7526 case 1: fdivs (cpu); return;
7527 case 2: fadds (cpu); return;
7528 case 3: fsubs (cpu); return;
7529 case 6: do_FMAXNM (cpu); return;
7530 case 7: do_FMINNM (cpu); return;
7531 case 8: fnmuls (cpu); return;
7532
7533 case 4:
7534 case 5:
7535 HALT_NYI;
7536
7537 default:
7538 HALT_UNALLOC;
7539 }
7540 }
7541
7542 static void
7543 dexSimpleFPCondSelect (sim_cpu *cpu)
7544 {
7545 /* FCSEL
7546 instr[31,23] = 0 0011 1100
7547 instr[22] = 0=>single 1=>double
7548 instr[21] = 1
7549 instr[20,16] = Sm
7550 instr[15,12] = cond
7551 instr[11,10] = 11
7552 instr[9,5] = Sn
7553 instr[4,0] = Cpu */
7554 unsigned sm = INSTR (20, 16);
7555 unsigned sn = INSTR ( 9, 5);
7556 unsigned sd = INSTR ( 4, 0);
7557 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7558
7559 NYI_assert (31, 23, 0x03C);
7560 NYI_assert (11, 10, 0x3);
7561
7562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7563 if (INSTR (22, 22))
7564 aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7565 : aarch64_get_FP_double (cpu, sm)));
7566 else
7567 aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7568 : aarch64_get_FP_float (cpu, sm)));
7569 }
7570
7571 /* Store 32 bit unscaled signed 9 bit. */
7572 static void
7573 fsturs (sim_cpu *cpu, int32_t offset)
7574 {
7575 unsigned int rn = INSTR (9, 5);
7576 unsigned int st = INSTR (4, 0);
7577
7578 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7579 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7580 aarch64_get_vec_u32 (cpu, st, 0));
7581 }
7582
7583 /* Store 64 bit unscaled signed 9 bit. */
7584 static void
7585 fsturd (sim_cpu *cpu, int32_t offset)
7586 {
7587 unsigned int rn = INSTR (9, 5);
7588 unsigned int st = INSTR (4, 0);
7589
7590 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7591 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7592 aarch64_get_vec_u64 (cpu, st, 0));
7593 }
7594
7595 /* Store 128 bit unscaled signed 9 bit. */
7596 static void
7597 fsturq (sim_cpu *cpu, int32_t offset)
7598 {
7599 unsigned int rn = INSTR (9, 5);
7600 unsigned int st = INSTR (4, 0);
7601 FRegister a;
7602
7603 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7604 aarch64_get_FP_long_double (cpu, st, & a);
7605 aarch64_set_mem_long_double (cpu,
7606 aarch64_get_reg_u64 (cpu, rn, 1)
7607 + offset, a);
7608 }
7609
7610 /* TODO FP move register. */
7611
7612 /* 32 bit fp to fp move register. */
7613 static void
7614 ffmovs (sim_cpu *cpu)
7615 {
7616 unsigned int rn = INSTR (9, 5);
7617 unsigned int st = INSTR (4, 0);
7618
7619 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7620 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7621 }
7622
7623 /* 64 bit fp to fp move register. */
7624 static void
7625 ffmovd (sim_cpu *cpu)
7626 {
7627 unsigned int rn = INSTR (9, 5);
7628 unsigned int st = INSTR (4, 0);
7629
7630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7631 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7632 }
7633
7634 /* 32 bit GReg to Vec move register. */
7635 static void
7636 fgmovs (sim_cpu *cpu)
7637 {
7638 unsigned int rn = INSTR (9, 5);
7639 unsigned int st = INSTR (4, 0);
7640
7641 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7642 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7643 }
7644
7645 /* 64 bit g to fp move register. */
7646 static void
7647 fgmovd (sim_cpu *cpu)
7648 {
7649 unsigned int rn = INSTR (9, 5);
7650 unsigned int st = INSTR (4, 0);
7651
7652 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7653 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7654 }
7655
7656 /* 32 bit fp to g move register. */
7657 static void
7658 gfmovs (sim_cpu *cpu)
7659 {
7660 unsigned int rn = INSTR (9, 5);
7661 unsigned int st = INSTR (4, 0);
7662
7663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7664 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7665 }
7666
7667 /* 64 bit fp to g move register. */
7668 static void
7669 gfmovd (sim_cpu *cpu)
7670 {
7671 unsigned int rn = INSTR (9, 5);
7672 unsigned int st = INSTR (4, 0);
7673
7674 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7675 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7676 }
7677
7678 /* FP move immediate
7679
7680 These install an immediate 8 bit value in the target register
7681 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7682 bit exponent. */
7683
7684 static void
7685 fmovs (sim_cpu *cpu)
7686 {
7687 unsigned int sd = INSTR (4, 0);
7688 uint32_t imm = INSTR (20, 13);
7689 float f = fp_immediate_for_encoding_32 (imm);
7690
7691 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7692 aarch64_set_FP_float (cpu, sd, f);
7693 }
7694
7695 static void
7696 fmovd (sim_cpu *cpu)
7697 {
7698 unsigned int sd = INSTR (4, 0);
7699 uint32_t imm = INSTR (20, 13);
7700 double d = fp_immediate_for_encoding_64 (imm);
7701
7702 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7703 aarch64_set_FP_double (cpu, sd, d);
7704 }
7705
7706 static void
7707 dexSimpleFPImmediate (sim_cpu *cpu)
7708 {
7709 /* instr[31,23] == 00111100
7710 instr[22] == type : single(0)/double(1)
7711 instr[21] == 1
7712 instr[20,13] == imm8
7713 instr[12,10] == 100
7714 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7715 instr[4,0] == Rd */
7716 uint32_t imm5 = INSTR (9, 5);
7717
7718 NYI_assert (31, 23, 0x3C);
7719
7720 if (imm5 != 0)
7721 HALT_UNALLOC;
7722
7723 if (INSTR (22, 22))
7724 fmovd (cpu);
7725 else
7726 fmovs (cpu);
7727 }
7728
7729 /* TODO specific decode and execute for group Load Store. */
7730
7731 /* TODO FP load/store single register (unscaled offset). */
7732
7733 /* TODO load 8 bit unscaled signed 9 bit. */
7734 /* TODO load 16 bit unscaled signed 9 bit. */
7735
7736 /* Load 32 bit unscaled signed 9 bit. */
7737 static void
7738 fldurs (sim_cpu *cpu, int32_t offset)
7739 {
7740 unsigned int rn = INSTR (9, 5);
7741 unsigned int st = INSTR (4, 0);
7742
7743 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7744 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7745 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7746 }
7747
7748 /* Load 64 bit unscaled signed 9 bit. */
7749 static void
7750 fldurd (sim_cpu *cpu, int32_t offset)
7751 {
7752 unsigned int rn = INSTR (9, 5);
7753 unsigned int st = INSTR (4, 0);
7754
7755 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7756 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7757 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7758 }
7759
7760 /* Load 128 bit unscaled signed 9 bit. */
7761 static void
7762 fldurq (sim_cpu *cpu, int32_t offset)
7763 {
7764 unsigned int rn = INSTR (9, 5);
7765 unsigned int st = INSTR (4, 0);
7766 FRegister a;
7767 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7768
7769 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7770 aarch64_get_mem_long_double (cpu, addr, & a);
7771 aarch64_set_FP_long_double (cpu, st, a);
7772 }
7773
7774 /* TODO store 8 bit unscaled signed 9 bit. */
7775 /* TODO store 16 bit unscaled signed 9 bit. */
7776
7777
7778 /* 1 source. */
7779
7780 /* Float absolute value. */
7781 static void
7782 fabss (sim_cpu *cpu)
7783 {
7784 unsigned sn = INSTR (9, 5);
7785 unsigned sd = INSTR (4, 0);
7786 float value = aarch64_get_FP_float (cpu, sn);
7787
7788 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7789 aarch64_set_FP_float (cpu, sd, fabsf (value));
7790 }
7791
7792 /* Double absolute value. */
7793 static void
7794 fabcpu (sim_cpu *cpu)
7795 {
7796 unsigned sn = INSTR (9, 5);
7797 unsigned sd = INSTR (4, 0);
7798 double value = aarch64_get_FP_double (cpu, sn);
7799
7800 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7801 aarch64_set_FP_double (cpu, sd, fabs (value));
7802 }
7803
7804 /* Float negative value. */
7805 static void
7806 fnegs (sim_cpu *cpu)
7807 {
7808 unsigned sn = INSTR (9, 5);
7809 unsigned sd = INSTR (4, 0);
7810
7811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7812 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7813 }
7814
7815 /* Double negative value. */
7816 static void
7817 fnegd (sim_cpu *cpu)
7818 {
7819 unsigned sn = INSTR (9, 5);
7820 unsigned sd = INSTR (4, 0);
7821
7822 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7823 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7824 }
7825
7826 /* Float square root. */
7827 static void
7828 fsqrts (sim_cpu *cpu)
7829 {
7830 unsigned sn = INSTR (9, 5);
7831 unsigned sd = INSTR (4, 0);
7832
7833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7834 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7835 }
7836
7837 /* Double square root. */
7838 static void
7839 fsqrtd (sim_cpu *cpu)
7840 {
7841 unsigned sn = INSTR (9, 5);
7842 unsigned sd = INSTR (4, 0);
7843
7844 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7845 aarch64_set_FP_double (cpu, sd,
7846 sqrt (aarch64_get_FP_double (cpu, sn)));
7847 }
7848
7849 /* Convert double to float. */
7850 static void
7851 fcvtds (sim_cpu *cpu)
7852 {
7853 unsigned sn = INSTR (9, 5);
7854 unsigned sd = INSTR (4, 0);
7855
7856 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7857 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7858 }
7859
7860 /* Convert float to double. */
7861 static void
7862 fcvtcpu (sim_cpu *cpu)
7863 {
7864 unsigned sn = INSTR (9, 5);
7865 unsigned sd = INSTR (4, 0);
7866
7867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7868 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7869 }
7870
7871 static void
7872 do_FRINT (sim_cpu *cpu)
7873 {
7874 /* instr[31,23] = 0001 1110 0
7875 instr[22] = single(0)/double(1)
7876 instr[21,18] = 1001
7877 instr[17,15] = rounding mode
7878 instr[14,10] = 10000
7879 instr[9,5] = source
7880 instr[4,0] = dest */
7881
7882 float val;
7883 unsigned rs = INSTR (9, 5);
7884 unsigned rd = INSTR (4, 0);
7885 unsigned int rmode = INSTR (17, 15);
7886
7887 NYI_assert (31, 23, 0x03C);
7888 NYI_assert (21, 18, 0x9);
7889 NYI_assert (14, 10, 0x10);
7890
7891 if (rmode == 6 || rmode == 7)
7892 /* FIXME: Add support for rmode == 6 exactness check. */
7893 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7894
7895 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7896 if (INSTR (22, 22))
7897 {
7898 double val = aarch64_get_FP_double (cpu, rs);
7899
7900 switch (rmode)
7901 {
7902 case 0: /* mode N: nearest or even. */
7903 {
7904 double rval = round (val);
7905
7906 if (val - rval == 0.5)
7907 {
7908 if (((rval / 2.0) * 2.0) != rval)
7909 rval += 1.0;
7910 }
7911
7912 aarch64_set_FP_double (cpu, rd, round (val));
7913 return;
7914 }
7915
7916 case 1: /* mode P: towards +inf. */
7917 if (val < 0.0)
7918 aarch64_set_FP_double (cpu, rd, trunc (val));
7919 else
7920 aarch64_set_FP_double (cpu, rd, round (val));
7921 return;
7922
7923 case 2: /* mode M: towards -inf. */
7924 if (val < 0.0)
7925 aarch64_set_FP_double (cpu, rd, round (val));
7926 else
7927 aarch64_set_FP_double (cpu, rd, trunc (val));
7928 return;
7929
7930 case 3: /* mode Z: towards 0. */
7931 aarch64_set_FP_double (cpu, rd, trunc (val));
7932 return;
7933
7934 case 4: /* mode A: away from 0. */
7935 aarch64_set_FP_double (cpu, rd, round (val));
7936 return;
7937
7938 case 6: /* mode X: use FPCR with exactness check. */
7939 case 7: /* mode I: use FPCR mode. */
7940 HALT_NYI;
7941
7942 default:
7943 HALT_UNALLOC;
7944 }
7945 }
7946
7947 val = aarch64_get_FP_float (cpu, rs);
7948
7949 switch (rmode)
7950 {
7951 case 0: /* mode N: nearest or even. */
7952 {
7953 float rval = roundf (val);
7954
7955 if (val - rval == 0.5)
7956 {
7957 if (((rval / 2.0) * 2.0) != rval)
7958 rval += 1.0;
7959 }
7960
7961 aarch64_set_FP_float (cpu, rd, rval);
7962 return;
7963 }
7964
7965 case 1: /* mode P: towards +inf. */
7966 if (val < 0.0)
7967 aarch64_set_FP_float (cpu, rd, truncf (val));
7968 else
7969 aarch64_set_FP_float (cpu, rd, roundf (val));
7970 return;
7971
7972 case 2: /* mode M: towards -inf. */
7973 if (val < 0.0)
7974 aarch64_set_FP_float (cpu, rd, truncf (val));
7975 else
7976 aarch64_set_FP_float (cpu, rd, roundf (val));
7977 return;
7978
7979 case 3: /* mode Z: towards 0. */
7980 aarch64_set_FP_float (cpu, rd, truncf (val));
7981 return;
7982
7983 case 4: /* mode A: away from 0. */
7984 aarch64_set_FP_float (cpu, rd, roundf (val));
7985 return;
7986
7987 case 6: /* mode X: use FPCR with exactness check. */
7988 case 7: /* mode I: use FPCR mode. */
7989 HALT_NYI;
7990
7991 default:
7992 HALT_UNALLOC;
7993 }
7994 }
7995
7996 /* Convert half to float. */
7997 static void
7998 do_FCVT_half_to_single (sim_cpu *cpu)
7999 {
8000 unsigned rn = INSTR (9, 5);
8001 unsigned rd = INSTR (4, 0);
8002
8003 NYI_assert (31, 10, 0x7B890);
8004
8005 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8006 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
8007 }
8008
8009 /* Convert half to double. */
8010 static void
8011 do_FCVT_half_to_double (sim_cpu *cpu)
8012 {
8013 unsigned rn = INSTR (9, 5);
8014 unsigned rd = INSTR (4, 0);
8015
8016 NYI_assert (31, 10, 0x7B8B0);
8017
8018 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8019 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
8020 }
8021
8022 static void
8023 do_FCVT_single_to_half (sim_cpu *cpu)
8024 {
8025 unsigned rn = INSTR (9, 5);
8026 unsigned rd = INSTR (4, 0);
8027
8028 NYI_assert (31, 10, 0x788F0);
8029
8030 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8031 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
8032 }
8033
8034 /* Convert double to half. */
8035 static void
8036 do_FCVT_double_to_half (sim_cpu *cpu)
8037 {
8038 unsigned rn = INSTR (9, 5);
8039 unsigned rd = INSTR (4, 0);
8040
8041 NYI_assert (31, 10, 0x798F0);
8042
8043 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8044 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
8045 }
8046
8047 static void
8048 dexSimpleFPDataProc1Source (sim_cpu *cpu)
8049 {
8050 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
8051 instr[30] = 0
8052 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8053 instr[28,25] = 1111
8054 instr[24] = 0
8055 instr[23,22] ==> type : 00 ==> source is single,
8056 01 ==> source is double
8057 10 ==> UNALLOC
8058 11 ==> UNALLOC or source is half
8059 instr[21] = 1
8060 instr[20,15] ==> opcode : with type 00 or 01
8061 000000 ==> FMOV, 000001 ==> FABS,
8062 000010 ==> FNEG, 000011 ==> FSQRT,
8063 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
8064 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
8065 001000 ==> FRINTN, 001001 ==> FRINTP,
8066 001010 ==> FRINTM, 001011 ==> FRINTZ,
8067 001100 ==> FRINTA, 001101 ==> UNALLOC
8068 001110 ==> FRINTX, 001111 ==> FRINTI
8069 with type 11
8070 000100 ==> FCVT (half-to-single)
8071 000101 ==> FCVT (half-to-double)
8072 instr[14,10] = 10000. */
8073
8074 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8075 uint32_t type = INSTR (23, 22);
8076 uint32_t opcode = INSTR (20, 15);
8077
8078 if (M_S != 0)
8079 HALT_UNALLOC;
8080
8081 if (type == 3)
8082 {
8083 if (opcode == 4)
8084 do_FCVT_half_to_single (cpu);
8085 else if (opcode == 5)
8086 do_FCVT_half_to_double (cpu);
8087 else
8088 HALT_UNALLOC;
8089 return;
8090 }
8091
8092 if (type == 2)
8093 HALT_UNALLOC;
8094
8095 switch (opcode)
8096 {
8097 case 0:
8098 if (type)
8099 ffmovd (cpu);
8100 else
8101 ffmovs (cpu);
8102 return;
8103
8104 case 1:
8105 if (type)
8106 fabcpu (cpu);
8107 else
8108 fabss (cpu);
8109 return;
8110
8111 case 2:
8112 if (type)
8113 fnegd (cpu);
8114 else
8115 fnegs (cpu);
8116 return;
8117
8118 case 3:
8119 if (type)
8120 fsqrtd (cpu);
8121 else
8122 fsqrts (cpu);
8123 return;
8124
8125 case 4:
8126 if (type)
8127 fcvtds (cpu);
8128 else
8129 HALT_UNALLOC;
8130 return;
8131
8132 case 5:
8133 if (type)
8134 HALT_UNALLOC;
8135 fcvtcpu (cpu);
8136 return;
8137
8138 case 8: /* FRINTN etc. */
8139 case 9:
8140 case 10:
8141 case 11:
8142 case 12:
8143 case 14:
8144 case 15:
8145 do_FRINT (cpu);
8146 return;
8147
8148 case 7:
8149 if (INSTR (22, 22))
8150 do_FCVT_double_to_half (cpu);
8151 else
8152 do_FCVT_single_to_half (cpu);
8153 return;
8154
8155 case 13:
8156 HALT_NYI;
8157
8158 default:
8159 HALT_UNALLOC;
8160 }
8161 }
8162
8163 /* 32 bit signed int to float. */
8164 static void
8165 scvtf32 (sim_cpu *cpu)
8166 {
8167 unsigned rn = INSTR (9, 5);
8168 unsigned sd = INSTR (4, 0);
8169
8170 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8171 aarch64_set_FP_float
8172 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8173 }
8174
8175 /* signed int to float. */
8176 static void
8177 scvtf (sim_cpu *cpu)
8178 {
8179 unsigned rn = INSTR (9, 5);
8180 unsigned sd = INSTR (4, 0);
8181
8182 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8183 aarch64_set_FP_float
8184 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8185 }
8186
8187 /* 32 bit signed int to double. */
8188 static void
8189 scvtd32 (sim_cpu *cpu)
8190 {
8191 unsigned rn = INSTR (9, 5);
8192 unsigned sd = INSTR (4, 0);
8193
8194 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8195 aarch64_set_FP_double
8196 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8197 }
8198
8199 /* signed int to double. */
8200 static void
8201 scvtd (sim_cpu *cpu)
8202 {
8203 unsigned rn = INSTR (9, 5);
8204 unsigned sd = INSTR (4, 0);
8205
8206 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8207 aarch64_set_FP_double
8208 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8209 }
8210
8211 static const float FLOAT_INT_MAX = (float) INT_MAX;
8212 static const float FLOAT_INT_MIN = (float) INT_MIN;
8213 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8214 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8215 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8216 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8217 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8218 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8219
8220 #define UINT_MIN 0
8221 #define ULONG_MIN 0
8222 static const float FLOAT_UINT_MAX = (float) UINT_MAX;
8223 static const float FLOAT_UINT_MIN = (float) UINT_MIN;
8224 static const double DOUBLE_UINT_MAX = (double) UINT_MAX;
8225 static const double DOUBLE_UINT_MIN = (double) UINT_MIN;
8226 static const float FLOAT_ULONG_MAX = (float) ULONG_MAX;
8227 static const float FLOAT_ULONG_MIN = (float) ULONG_MIN;
8228 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8229 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8230
8231 /* Check for FP exception conditions:
8232 NaN raises IO
8233 Infinity raises IO
8234 Out of Range raises IO and IX and saturates value
8235 Denormal raises ID and IX and sets to zero. */
8236 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8237 do \
8238 { \
8239 switch (fpclassify (F)) \
8240 { \
8241 case FP_INFINITE: \
8242 case FP_NAN: \
8243 aarch64_set_FPSR (cpu, IO); \
8244 if (signbit (F)) \
8245 VALUE = ITYPE##_MAX; \
8246 else \
8247 VALUE = ITYPE##_MIN; \
8248 break; \
8249 \
8250 case FP_NORMAL: \
8251 if (F >= FTYPE##_##ITYPE##_MAX) \
8252 { \
8253 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8254 VALUE = ITYPE##_MAX; \
8255 } \
8256 else if (F <= FTYPE##_##ITYPE##_MIN) \
8257 { \
8258 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8259 VALUE = ITYPE##_MIN; \
8260 } \
8261 break; \
8262 \
8263 case FP_SUBNORMAL: \
8264 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8265 VALUE = 0; \
8266 break; \
8267 \
8268 default: \
8269 case FP_ZERO: \
8270 VALUE = 0; \
8271 break; \
8272 } \
8273 } \
8274 while (0)
8275
8276 /* 32 bit convert float to signed int truncate towards zero. */
8277 static void
8278 fcvtszs32 (sim_cpu *cpu)
8279 {
8280 unsigned sn = INSTR (9, 5);
8281 unsigned rd = INSTR (4, 0);
8282 /* TODO : check that this rounds toward zero. */
8283 float f = aarch64_get_FP_float (cpu, sn);
8284 int32_t value = (int32_t) f;
8285
8286 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8287
8288 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8289 /* Avoid sign extension to 64 bit. */
8290 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8291 }
8292
8293 /* 64 bit convert float to signed int truncate towards zero. */
8294 static void
8295 fcvtszs (sim_cpu *cpu)
8296 {
8297 unsigned sn = INSTR (9, 5);
8298 unsigned rd = INSTR (4, 0);
8299 float f = aarch64_get_FP_float (cpu, sn);
8300 int64_t value = (int64_t) f;
8301
8302 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8303
8304 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8305 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8306 }
8307
8308 /* 32 bit convert double to signed int truncate towards zero. */
8309 static void
8310 fcvtszd32 (sim_cpu *cpu)
8311 {
8312 unsigned sn = INSTR (9, 5);
8313 unsigned rd = INSTR (4, 0);
8314 /* TODO : check that this rounds toward zero. */
8315 double d = aarch64_get_FP_double (cpu, sn);
8316 int32_t value = (int32_t) d;
8317
8318 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8319
8320 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8321 /* Avoid sign extension to 64 bit. */
8322 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8323 }
8324
8325 /* 64 bit convert double to signed int truncate towards zero. */
8326 static void
8327 fcvtszd (sim_cpu *cpu)
8328 {
8329 unsigned sn = INSTR (9, 5);
8330 unsigned rd = INSTR (4, 0);
8331 /* TODO : check that this rounds toward zero. */
8332 double d = aarch64_get_FP_double (cpu, sn);
8333 int64_t value;
8334
8335 value = (int64_t) d;
8336
8337 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8338
8339 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8340 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8341 }
8342
8343 static void
8344 do_fcvtzu (sim_cpu *cpu)
8345 {
8346 /* instr[31] = size: 32-bit (0), 64-bit (1)
8347 instr[30,23] = 00111100
8348 instr[22] = type: single (0)/ double (1)
8349 instr[21] = enable (0)/disable(1) precision
8350 instr[20,16] = 11001
8351 instr[15,10] = precision
8352 instr[9,5] = Rs
8353 instr[4,0] = Rd. */
8354
8355 unsigned rs = INSTR (9, 5);
8356 unsigned rd = INSTR (4, 0);
8357
8358 NYI_assert (30, 23, 0x3C);
8359 NYI_assert (20, 16, 0x19);
8360
8361 if (INSTR (21, 21) != 1)
8362 /* Convert to fixed point. */
8363 HALT_NYI;
8364
8365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8366 if (INSTR (31, 31))
8367 {
8368 /* Convert to unsigned 64-bit integer. */
8369 if (INSTR (22, 22))
8370 {
8371 double d = aarch64_get_FP_double (cpu, rs);
8372 uint64_t value = (uint64_t) d;
8373
8374 /* Do not raise an exception if we have reached ULONG_MAX. */
8375 if (value != (1UL << 63))
8376 RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8377
8378 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8379 }
8380 else
8381 {
8382 float f = aarch64_get_FP_float (cpu, rs);
8383 uint64_t value = (uint64_t) f;
8384
8385 /* Do not raise an exception if we have reached ULONG_MAX. */
8386 if (value != (1UL << 63))
8387 RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8388
8389 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8390 }
8391 }
8392 else
8393 {
8394 uint32_t value;
8395
8396 /* Convert to unsigned 32-bit integer. */
8397 if (INSTR (22, 22))
8398 {
8399 double d = aarch64_get_FP_double (cpu, rs);
8400
8401 value = (uint32_t) d;
8402 /* Do not raise an exception if we have reached UINT_MAX. */
8403 if (value != (1UL << 31))
8404 RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8405 }
8406 else
8407 {
8408 float f = aarch64_get_FP_float (cpu, rs);
8409
8410 value = (uint32_t) f;
8411 /* Do not raise an exception if we have reached UINT_MAX. */
8412 if (value != (1UL << 31))
8413 RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8414 }
8415
8416 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8417 }
8418 }
8419
8420 static void
8421 do_UCVTF (sim_cpu *cpu)
8422 {
8423 /* instr[31] = size: 32-bit (0), 64-bit (1)
8424 instr[30,23] = 001 1110 0
8425 instr[22] = type: single (0)/ double (1)
8426 instr[21] = enable (0)/disable(1) precision
8427 instr[20,16] = 0 0011
8428 instr[15,10] = precision
8429 instr[9,5] = Rs
8430 instr[4,0] = Rd. */
8431
8432 unsigned rs = INSTR (9, 5);
8433 unsigned rd = INSTR (4, 0);
8434
8435 NYI_assert (30, 23, 0x3C);
8436 NYI_assert (20, 16, 0x03);
8437
8438 if (INSTR (21, 21) != 1)
8439 HALT_NYI;
8440
8441 /* FIXME: Add exception raising. */
8442 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8443 if (INSTR (31, 31))
8444 {
8445 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8446
8447 if (INSTR (22, 22))
8448 aarch64_set_FP_double (cpu, rd, (double) value);
8449 else
8450 aarch64_set_FP_float (cpu, rd, (float) value);
8451 }
8452 else
8453 {
8454 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8455
8456 if (INSTR (22, 22))
8457 aarch64_set_FP_double (cpu, rd, (double) value);
8458 else
8459 aarch64_set_FP_float (cpu, rd, (float) value);
8460 }
8461 }
8462
8463 static void
8464 float_vector_move (sim_cpu *cpu)
8465 {
8466 /* instr[31,17] == 100 1111 0101 0111
8467 instr[16] ==> direction 0=> to GR, 1=> from GR
8468 instr[15,10] => ???
8469 instr[9,5] ==> source
8470 instr[4,0] ==> dest. */
8471
8472 unsigned rn = INSTR (9, 5);
8473 unsigned rd = INSTR (4, 0);
8474
8475 NYI_assert (31, 17, 0x4F57);
8476
8477 if (INSTR (15, 10) != 0)
8478 HALT_UNALLOC;
8479
8480 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8481 if (INSTR (16, 16))
8482 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8483 else
8484 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8485 }
8486
8487 static void
8488 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8489 {
8490 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8491 instr[30 = 0
8492 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8493 instr[28,25] = 1111
8494 instr[24] = 0
8495 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8496 instr[21] = 1
8497 instr[20,19] = rmode
8498 instr[18,16] = opcode
8499 instr[15,10] = 10 0000 */
8500
8501 uint32_t rmode_opcode;
8502 uint32_t size_type;
8503 uint32_t type;
8504 uint32_t size;
8505 uint32_t S;
8506
8507 if (INSTR (31, 17) == 0x4F57)
8508 {
8509 float_vector_move (cpu);
8510 return;
8511 }
8512
8513 size = INSTR (31, 31);
8514 S = INSTR (29, 29);
8515 if (S != 0)
8516 HALT_UNALLOC;
8517
8518 type = INSTR (23, 22);
8519 if (type > 1)
8520 HALT_UNALLOC;
8521
8522 rmode_opcode = INSTR (20, 16);
8523 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8524
8525 switch (rmode_opcode)
8526 {
8527 case 2: /* SCVTF. */
8528 switch (size_type)
8529 {
8530 case 0: scvtf32 (cpu); return;
8531 case 1: scvtd32 (cpu); return;
8532 case 2: scvtf (cpu); return;
8533 case 3: scvtd (cpu); return;
8534 }
8535
8536 case 6: /* FMOV GR, Vec. */
8537 switch (size_type)
8538 {
8539 case 0: gfmovs (cpu); return;
8540 case 3: gfmovd (cpu); return;
8541 default: HALT_UNALLOC;
8542 }
8543
8544 case 7: /* FMOV vec, GR. */
8545 switch (size_type)
8546 {
8547 case 0: fgmovs (cpu); return;
8548 case 3: fgmovd (cpu); return;
8549 default: HALT_UNALLOC;
8550 }
8551
8552 case 24: /* FCVTZS. */
8553 switch (size_type)
8554 {
8555 case 0: fcvtszs32 (cpu); return;
8556 case 1: fcvtszd32 (cpu); return;
8557 case 2: fcvtszs (cpu); return;
8558 case 3: fcvtszd (cpu); return;
8559 }
8560
8561 case 25: do_fcvtzu (cpu); return;
8562 case 3: do_UCVTF (cpu); return;
8563
8564 case 0: /* FCVTNS. */
8565 case 1: /* FCVTNU. */
8566 case 4: /* FCVTAS. */
8567 case 5: /* FCVTAU. */
8568 case 8: /* FCVPTS. */
8569 case 9: /* FCVTPU. */
8570 case 16: /* FCVTMS. */
8571 case 17: /* FCVTMU. */
8572 default:
8573 HALT_NYI;
8574 }
8575 }
8576
8577 static void
8578 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8579 {
8580 uint32_t flags;
8581
8582 /* FIXME: Add exception raising. */
8583 if (isnan (fvalue1) || isnan (fvalue2))
8584 flags = C|V;
8585 else if (isinf (fvalue1) && isinf (fvalue2))
8586 {
8587 /* Subtracting two infinities may give a NaN. We only need to compare
8588 the signs, which we can get from isinf. */
8589 int result = isinf (fvalue1) - isinf (fvalue2);
8590
8591 if (result == 0)
8592 flags = Z|C;
8593 else if (result < 0)
8594 flags = N;
8595 else /* (result > 0). */
8596 flags = C;
8597 }
8598 else
8599 {
8600 float result = fvalue1 - fvalue2;
8601
8602 if (result == 0.0)
8603 flags = Z|C;
8604 else if (result < 0)
8605 flags = N;
8606 else /* (result > 0). */
8607 flags = C;
8608 }
8609
8610 aarch64_set_CPSR (cpu, flags);
8611 }
8612
8613 static void
8614 fcmps (sim_cpu *cpu)
8615 {
8616 unsigned sm = INSTR (20, 16);
8617 unsigned sn = INSTR ( 9, 5);
8618
8619 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8620 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8621
8622 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8623 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8624 }
8625
8626 /* Float compare to zero -- Invalid Operation exception
8627 only on signaling NaNs. */
8628 static void
8629 fcmpzs (sim_cpu *cpu)
8630 {
8631 unsigned sn = INSTR ( 9, 5);
8632 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8633
8634 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8635 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8636 }
8637
8638 /* Float compare -- Invalid Operation exception on all NaNs. */
8639 static void
8640 fcmpes (sim_cpu *cpu)
8641 {
8642 unsigned sm = INSTR (20, 16);
8643 unsigned sn = INSTR ( 9, 5);
8644
8645 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8646 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8647
8648 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8649 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8650 }
8651
8652 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8653 static void
8654 fcmpzes (sim_cpu *cpu)
8655 {
8656 unsigned sn = INSTR ( 9, 5);
8657 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8658
8659 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8660 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8661 }
8662
8663 static void
8664 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8665 {
8666 uint32_t flags;
8667
8668 /* FIXME: Add exception raising. */
8669 if (isnan (dval1) || isnan (dval2))
8670 flags = C|V;
8671 else if (isinf (dval1) && isinf (dval2))
8672 {
8673 /* Subtracting two infinities may give a NaN. We only need to compare
8674 the signs, which we can get from isinf. */
8675 int result = isinf (dval1) - isinf (dval2);
8676
8677 if (result == 0)
8678 flags = Z|C;
8679 else if (result < 0)
8680 flags = N;
8681 else /* (result > 0). */
8682 flags = C;
8683 }
8684 else
8685 {
8686 double result = dval1 - dval2;
8687
8688 if (result == 0.0)
8689 flags = Z|C;
8690 else if (result < 0)
8691 flags = N;
8692 else /* (result > 0). */
8693 flags = C;
8694 }
8695
8696 aarch64_set_CPSR (cpu, flags);
8697 }
8698
8699 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8700 static void
8701 fcmpd (sim_cpu *cpu)
8702 {
8703 unsigned sm = INSTR (20, 16);
8704 unsigned sn = INSTR ( 9, 5);
8705
8706 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8707 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8708
8709 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8710 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8711 }
8712
8713 /* Double compare to zero -- Invalid Operation exception
8714 only on signaling NaNs. */
8715 static void
8716 fcmpzd (sim_cpu *cpu)
8717 {
8718 unsigned sn = INSTR ( 9, 5);
8719 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8720
8721 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8722 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8723 }
8724
8725 /* Double compare -- Invalid Operation exception on all NaNs. */
8726 static void
8727 fcmped (sim_cpu *cpu)
8728 {
8729 unsigned sm = INSTR (20, 16);
8730 unsigned sn = INSTR ( 9, 5);
8731
8732 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8733 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8734
8735 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8736 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8737 }
8738
8739 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8740 static void
8741 fcmpzed (sim_cpu *cpu)
8742 {
8743 unsigned sn = INSTR ( 9, 5);
8744 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8745
8746 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8747 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8748 }
8749
8750 static void
8751 dexSimpleFPCompare (sim_cpu *cpu)
8752 {
8753 /* assert instr[28,25] == 1111
8754 instr[30:24:21:13,10] = 0011000
8755 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8756 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8757 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8758 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8759 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8760 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8761 ow ==> UNALLOC */
8762 uint32_t dispatch;
8763 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8764 uint32_t type = INSTR (23, 22);
8765 uint32_t op = INSTR (15, 14);
8766 uint32_t op2_2_0 = INSTR (2, 0);
8767
8768 if (op2_2_0 != 0)
8769 HALT_UNALLOC;
8770
8771 if (M_S != 0)
8772 HALT_UNALLOC;
8773
8774 if (type > 1)
8775 HALT_UNALLOC;
8776
8777 if (op != 0)
8778 HALT_UNALLOC;
8779
8780 /* dispatch on type and top 2 bits of opcode. */
8781 dispatch = (type << 2) | INSTR (4, 3);
8782
8783 switch (dispatch)
8784 {
8785 case 0: fcmps (cpu); return;
8786 case 1: fcmpzs (cpu); return;
8787 case 2: fcmpes (cpu); return;
8788 case 3: fcmpzes (cpu); return;
8789 case 4: fcmpd (cpu); return;
8790 case 5: fcmpzd (cpu); return;
8791 case 6: fcmped (cpu); return;
8792 case 7: fcmpzed (cpu); return;
8793 }
8794 }
8795
8796 static void
8797 do_scalar_FADDP (sim_cpu *cpu)
8798 {
8799 /* instr [31,23] = 0111 1110 0
8800 instr [22] = single(0)/double(1)
8801 instr [21,10] = 11 0000 1101 10
8802 instr [9,5] = Fn
8803 instr [4,0] = Fd. */
8804
8805 unsigned Fn = INSTR (9, 5);
8806 unsigned Fd = INSTR (4, 0);
8807
8808 NYI_assert (31, 23, 0x0FC);
8809 NYI_assert (21, 10, 0xC36);
8810
8811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8812 if (INSTR (22, 22))
8813 {
8814 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8815 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8816
8817 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8818 }
8819 else
8820 {
8821 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8822 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8823
8824 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8825 }
8826 }
8827
8828 /* Floating point absolute difference. */
8829
8830 static void
8831 do_scalar_FABD (sim_cpu *cpu)
8832 {
8833 /* instr [31,23] = 0111 1110 1
8834 instr [22] = float(0)/double(1)
8835 instr [21] = 1
8836 instr [20,16] = Rm
8837 instr [15,10] = 1101 01
8838 instr [9, 5] = Rn
8839 instr [4, 0] = Rd. */
8840
8841 unsigned rm = INSTR (20, 16);
8842 unsigned rn = INSTR (9, 5);
8843 unsigned rd = INSTR (4, 0);
8844
8845 NYI_assert (31, 23, 0x0FD);
8846 NYI_assert (21, 21, 1);
8847 NYI_assert (15, 10, 0x35);
8848
8849 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8850 if (INSTR (22, 22))
8851 aarch64_set_FP_double (cpu, rd,
8852 fabs (aarch64_get_FP_double (cpu, rn)
8853 - aarch64_get_FP_double (cpu, rm)));
8854 else
8855 aarch64_set_FP_float (cpu, rd,
8856 fabsf (aarch64_get_FP_float (cpu, rn)
8857 - aarch64_get_FP_float (cpu, rm)));
8858 }
8859
8860 static void
8861 do_scalar_CMGT (sim_cpu *cpu)
8862 {
8863 /* instr [31,21] = 0101 1110 111
8864 instr [20,16] = Rm
8865 instr [15,10] = 00 1101
8866 instr [9, 5] = Rn
8867 instr [4, 0] = Rd. */
8868
8869 unsigned rm = INSTR (20, 16);
8870 unsigned rn = INSTR (9, 5);
8871 unsigned rd = INSTR (4, 0);
8872
8873 NYI_assert (31, 21, 0x2F7);
8874 NYI_assert (15, 10, 0x0D);
8875
8876 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8877 aarch64_set_vec_u64 (cpu, rd, 0,
8878 aarch64_get_vec_u64 (cpu, rn, 0) >
8879 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8880 }
8881
8882 static void
8883 do_scalar_USHR (sim_cpu *cpu)
8884 {
8885 /* instr [31,23] = 0111 1111 0
8886 instr [22,16] = shift amount
8887 instr [15,10] = 0000 01
8888 instr [9, 5] = Rn
8889 instr [4, 0] = Rd. */
8890
8891 unsigned amount = 128 - INSTR (22, 16);
8892 unsigned rn = INSTR (9, 5);
8893 unsigned rd = INSTR (4, 0);
8894
8895 NYI_assert (31, 23, 0x0FE);
8896 NYI_assert (15, 10, 0x01);
8897
8898 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8899 aarch64_set_vec_u64 (cpu, rd, 0,
8900 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8901 }
8902
8903 static void
8904 do_scalar_SSHL (sim_cpu *cpu)
8905 {
8906 /* instr [31,21] = 0101 1110 111
8907 instr [20,16] = Rm
8908 instr [15,10] = 0100 01
8909 instr [9, 5] = Rn
8910 instr [4, 0] = Rd. */
8911
8912 unsigned rm = INSTR (20, 16);
8913 unsigned rn = INSTR (9, 5);
8914 unsigned rd = INSTR (4, 0);
8915 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8916
8917 NYI_assert (31, 21, 0x2F7);
8918 NYI_assert (15, 10, 0x11);
8919
8920 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8921 if (shift >= 0)
8922 aarch64_set_vec_s64 (cpu, rd, 0,
8923 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8924 else
8925 aarch64_set_vec_s64 (cpu, rd, 0,
8926 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8927 }
8928
8929 /* Floating point scalar compare greater than or equal to 0. */
8930 static void
8931 do_scalar_FCMGE_zero (sim_cpu *cpu)
8932 {
8933 /* instr [31,23] = 0111 1110 1
8934 instr [22,22] = size
8935 instr [21,16] = 1000 00
8936 instr [15,10] = 1100 10
8937 instr [9, 5] = Rn
8938 instr [4, 0] = Rd. */
8939
8940 unsigned size = INSTR (22, 22);
8941 unsigned rn = INSTR (9, 5);
8942 unsigned rd = INSTR (4, 0);
8943
8944 NYI_assert (31, 23, 0x0FD);
8945 NYI_assert (21, 16, 0x20);
8946 NYI_assert (15, 10, 0x32);
8947
8948 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8949 if (size)
8950 aarch64_set_vec_u64 (cpu, rd, 0,
8951 aarch64_get_vec_double (cpu, rn, 0) >= 0.0 ? -1 : 0);
8952 else
8953 aarch64_set_vec_u32 (cpu, rd, 0,
8954 aarch64_get_vec_float (cpu, rn, 0) >= 0.0 ? -1 : 0);
8955 }
8956
8957 /* Floating point scalar compare less than or equal to 0. */
8958 static void
8959 do_scalar_FCMLE_zero (sim_cpu *cpu)
8960 {
8961 /* instr [31,23] = 0111 1110 1
8962 instr [22,22] = size
8963 instr [21,16] = 1000 00
8964 instr [15,10] = 1101 10
8965 instr [9, 5] = Rn
8966 instr [4, 0] = Rd. */
8967
8968 unsigned size = INSTR (22, 22);
8969 unsigned rn = INSTR (9, 5);
8970 unsigned rd = INSTR (4, 0);
8971
8972 NYI_assert (31, 23, 0x0FD);
8973 NYI_assert (21, 16, 0x20);
8974 NYI_assert (15, 10, 0x36);
8975
8976 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8977 if (size)
8978 aarch64_set_vec_u64 (cpu, rd, 0,
8979 aarch64_get_vec_double (cpu, rn, 0) <= 0.0 ? -1 : 0);
8980 else
8981 aarch64_set_vec_u32 (cpu, rd, 0,
8982 aarch64_get_vec_float (cpu, rn, 0) <= 0.0 ? -1 : 0);
8983 }
8984
8985 /* Floating point scalar compare greater than 0. */
8986 static void
8987 do_scalar_FCMGT_zero (sim_cpu *cpu)
8988 {
8989 /* instr [31,23] = 0101 1110 1
8990 instr [22,22] = size
8991 instr [21,16] = 1000 00
8992 instr [15,10] = 1100 10
8993 instr [9, 5] = Rn
8994 instr [4, 0] = Rd. */
8995
8996 unsigned size = INSTR (22, 22);
8997 unsigned rn = INSTR (9, 5);
8998 unsigned rd = INSTR (4, 0);
8999
9000 NYI_assert (31, 23, 0x0BD);
9001 NYI_assert (21, 16, 0x20);
9002 NYI_assert (15, 10, 0x32);
9003
9004 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9005 if (size)
9006 aarch64_set_vec_u64 (cpu, rd, 0,
9007 aarch64_get_vec_double (cpu, rn, 0) > 0.0 ? -1 : 0);
9008 else
9009 aarch64_set_vec_u32 (cpu, rd, 0,
9010 aarch64_get_vec_float (cpu, rn, 0) > 0.0 ? -1 : 0);
9011 }
9012
9013 /* Floating point scalar compare equal to 0. */
9014 static void
9015 do_scalar_FCMEQ_zero (sim_cpu *cpu)
9016 {
9017 /* instr [31,23] = 0101 1110 1
9018 instr [22,22] = size
9019 instr [21,16] = 1000 00
9020 instr [15,10] = 1101 10
9021 instr [9, 5] = Rn
9022 instr [4, 0] = Rd. */
9023
9024 unsigned size = INSTR (22, 22);
9025 unsigned rn = INSTR (9, 5);
9026 unsigned rd = INSTR (4, 0);
9027
9028 NYI_assert (31, 23, 0x0BD);
9029 NYI_assert (21, 16, 0x20);
9030 NYI_assert (15, 10, 0x36);
9031
9032 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9033 if (size)
9034 aarch64_set_vec_u64 (cpu, rd, 0,
9035 aarch64_get_vec_double (cpu, rn, 0) == 0.0 ? -1 : 0);
9036 else
9037 aarch64_set_vec_u32 (cpu, rd, 0,
9038 aarch64_get_vec_float (cpu, rn, 0) == 0.0 ? -1 : 0);
9039 }
9040
9041 /* Floating point scalar compare less than 0. */
9042 static void
9043 do_scalar_FCMLT_zero (sim_cpu *cpu)
9044 {
9045 /* instr [31,23] = 0101 1110 1
9046 instr [22,22] = size
9047 instr [21,16] = 1000 00
9048 instr [15,10] = 1110 10
9049 instr [9, 5] = Rn
9050 instr [4, 0] = Rd. */
9051
9052 unsigned size = INSTR (22, 22);
9053 unsigned rn = INSTR (9, 5);
9054 unsigned rd = INSTR (4, 0);
9055
9056 NYI_assert (31, 23, 0x0BD);
9057 NYI_assert (21, 16, 0x20);
9058 NYI_assert (15, 10, 0x3A);
9059
9060 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9061 if (size)
9062 aarch64_set_vec_u64 (cpu, rd, 0,
9063 aarch64_get_vec_double (cpu, rn, 0) < 0.0 ? -1 : 0);
9064 else
9065 aarch64_set_vec_u32 (cpu, rd, 0,
9066 aarch64_get_vec_float (cpu, rn, 0) < 0.0 ? -1 : 0);
9067 }
9068
9069 static void
9070 do_scalar_shift (sim_cpu *cpu)
9071 {
9072 /* instr [31,23] = 0101 1111 0
9073 instr [22,16] = shift amount
9074 instr [15,10] = 0101 01 [SHL]
9075 instr [15,10] = 0000 01 [SSHR]
9076 instr [9, 5] = Rn
9077 instr [4, 0] = Rd. */
9078
9079 unsigned rn = INSTR (9, 5);
9080 unsigned rd = INSTR (4, 0);
9081 unsigned amount;
9082
9083 NYI_assert (31, 23, 0x0BE);
9084
9085 if (INSTR (22, 22) == 0)
9086 HALT_UNALLOC;
9087
9088 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9089 switch (INSTR (15, 10))
9090 {
9091 case 0x01: /* SSHR */
9092 amount = 128 - INSTR (22, 16);
9093 aarch64_set_vec_s64 (cpu, rd, 0,
9094 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
9095 return;
9096 case 0x15: /* SHL */
9097 amount = INSTR (22, 16) - 64;
9098 aarch64_set_vec_u64 (cpu, rd, 0,
9099 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
9100 return;
9101 default:
9102 HALT_NYI;
9103 }
9104 }
9105
9106 /* FCMEQ FCMGT FCMGE. */
9107 static void
9108 do_scalar_FCM (sim_cpu *cpu)
9109 {
9110 /* instr [31,30] = 01
9111 instr [29] = U
9112 instr [28,24] = 1 1110
9113 instr [23] = E
9114 instr [22] = size
9115 instr [21] = 1
9116 instr [20,16] = Rm
9117 instr [15,12] = 1110
9118 instr [11] = AC
9119 instr [10] = 1
9120 instr [9, 5] = Rn
9121 instr [4, 0] = Rd. */
9122
9123 unsigned rm = INSTR (20, 16);
9124 unsigned rn = INSTR (9, 5);
9125 unsigned rd = INSTR (4, 0);
9126 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
9127 unsigned result;
9128 float val1;
9129 float val2;
9130
9131 NYI_assert (31, 30, 1);
9132 NYI_assert (28, 24, 0x1E);
9133 NYI_assert (21, 21, 1);
9134 NYI_assert (15, 12, 0xE);
9135 NYI_assert (10, 10, 1);
9136
9137 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9138 if (INSTR (22, 22))
9139 {
9140 double val1 = aarch64_get_FP_double (cpu, rn);
9141 double val2 = aarch64_get_FP_double (cpu, rm);
9142
9143 switch (EUac)
9144 {
9145 case 0: /* 000 */
9146 result = val1 == val2;
9147 break;
9148
9149 case 3: /* 011 */
9150 val1 = fabs (val1);
9151 val2 = fabs (val2);
9152 /* Fall through. */
9153 case 2: /* 010 */
9154 result = val1 >= val2;
9155 break;
9156
9157 case 7: /* 111 */
9158 val1 = fabs (val1);
9159 val2 = fabs (val2);
9160 /* Fall through. */
9161 case 6: /* 110 */
9162 result = val1 > val2;
9163 break;
9164
9165 default:
9166 HALT_UNALLOC;
9167 }
9168
9169 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9170 return;
9171 }
9172
9173 val1 = aarch64_get_FP_float (cpu, rn);
9174 val2 = aarch64_get_FP_float (cpu, rm);
9175
9176 switch (EUac)
9177 {
9178 case 0: /* 000 */
9179 result = val1 == val2;
9180 break;
9181
9182 case 3: /* 011 */
9183 val1 = fabsf (val1);
9184 val2 = fabsf (val2);
9185 /* Fall through. */
9186 case 2: /* 010 */
9187 result = val1 >= val2;
9188 break;
9189
9190 case 7: /* 111 */
9191 val1 = fabsf (val1);
9192 val2 = fabsf (val2);
9193 /* Fall through. */
9194 case 6: /* 110 */
9195 result = val1 > val2;
9196 break;
9197
9198 default:
9199 HALT_UNALLOC;
9200 }
9201
9202 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9203 }
9204
9205 /* An alias of DUP. */
9206 static void
9207 do_scalar_MOV (sim_cpu *cpu)
9208 {
9209 /* instr [31,21] = 0101 1110 000
9210 instr [20,16] = imm5
9211 instr [15,10] = 0000 01
9212 instr [9, 5] = Rn
9213 instr [4, 0] = Rd. */
9214
9215 unsigned rn = INSTR (9, 5);
9216 unsigned rd = INSTR (4, 0);
9217 unsigned index;
9218
9219 NYI_assert (31, 21, 0x2F0);
9220 NYI_assert (15, 10, 0x01);
9221
9222 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9223 if (INSTR (16, 16))
9224 {
9225 /* 8-bit. */
9226 index = INSTR (20, 17);
9227 aarch64_set_vec_u8
9228 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
9229 }
9230 else if (INSTR (17, 17))
9231 {
9232 /* 16-bit. */
9233 index = INSTR (20, 18);
9234 aarch64_set_vec_u16
9235 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9236 }
9237 else if (INSTR (18, 18))
9238 {
9239 /* 32-bit. */
9240 index = INSTR (20, 19);
9241 aarch64_set_vec_u32
9242 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9243 }
9244 else if (INSTR (19, 19))
9245 {
9246 /* 64-bit. */
9247 index = INSTR (20, 20);
9248 aarch64_set_vec_u64
9249 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9250 }
9251 else
9252 HALT_UNALLOC;
9253 }
9254
9255 static void
9256 do_scalar_NEG (sim_cpu *cpu)
9257 {
9258 /* instr [31,10] = 0111 1110 1110 0000 1011 10
9259 instr [9, 5] = Rn
9260 instr [4, 0] = Rd. */
9261
9262 unsigned rn = INSTR (9, 5);
9263 unsigned rd = INSTR (4, 0);
9264
9265 NYI_assert (31, 10, 0x1FB82E);
9266
9267 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9268 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9269 }
9270
9271 static void
9272 do_scalar_USHL (sim_cpu *cpu)
9273 {
9274 /* instr [31,21] = 0111 1110 111
9275 instr [20,16] = Rm
9276 instr [15,10] = 0100 01
9277 instr [9, 5] = Rn
9278 instr [4, 0] = Rd. */
9279
9280 unsigned rm = INSTR (20, 16);
9281 unsigned rn = INSTR (9, 5);
9282 unsigned rd = INSTR (4, 0);
9283 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9284
9285 NYI_assert (31, 21, 0x3F7);
9286 NYI_assert (15, 10, 0x11);
9287
9288 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9289 if (shift >= 0)
9290 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9291 else
9292 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9293 }
9294
9295 static void
9296 do_double_add (sim_cpu *cpu)
9297 {
9298 /* instr [31,21] = 0101 1110 111
9299 instr [20,16] = Fn
9300 instr [15,10] = 1000 01
9301 instr [9,5] = Fm
9302 instr [4,0] = Fd. */
9303 unsigned Fd;
9304 unsigned Fm;
9305 unsigned Fn;
9306 double val1;
9307 double val2;
9308
9309 NYI_assert (31, 21, 0x2F7);
9310 NYI_assert (15, 10, 0x21);
9311
9312 Fd = INSTR (4, 0);
9313 Fm = INSTR (9, 5);
9314 Fn = INSTR (20, 16);
9315
9316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9317 val1 = aarch64_get_FP_double (cpu, Fm);
9318 val2 = aarch64_get_FP_double (cpu, Fn);
9319
9320 aarch64_set_FP_double (cpu, Fd, val1 + val2);
9321 }
9322
9323 static void
9324 do_scalar_UCVTF (sim_cpu *cpu)
9325 {
9326 /* instr [31,23] = 0111 1110 0
9327 instr [22] = single(0)/double(1)
9328 instr [21,10] = 10 0001 1101 10
9329 instr [9,5] = rn
9330 instr [4,0] = rd. */
9331
9332 unsigned rn = INSTR (9, 5);
9333 unsigned rd = INSTR (4, 0);
9334
9335 NYI_assert (31, 23, 0x0FC);
9336 NYI_assert (21, 10, 0x876);
9337
9338 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9339 if (INSTR (22, 22))
9340 {
9341 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9342
9343 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9344 }
9345 else
9346 {
9347 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9348
9349 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9350 }
9351 }
9352
9353 static void
9354 do_scalar_vec (sim_cpu *cpu)
9355 {
9356 /* instr [30] = 1. */
9357 /* instr [28,25] = 1111. */
9358 switch (INSTR (31, 23))
9359 {
9360 case 0xBC:
9361 switch (INSTR (15, 10))
9362 {
9363 case 0x01: do_scalar_MOV (cpu); return;
9364 case 0x39: do_scalar_FCM (cpu); return;
9365 case 0x3B: do_scalar_FCM (cpu); return;
9366 }
9367 break;
9368
9369 case 0xBE: do_scalar_shift (cpu); return;
9370
9371 case 0xFC:
9372 switch (INSTR (15, 10))
9373 {
9374 case 0x36:
9375 switch (INSTR (21, 16))
9376 {
9377 case 0x30: do_scalar_FADDP (cpu); return;
9378 case 0x21: do_scalar_UCVTF (cpu); return;
9379 }
9380 HALT_NYI;
9381 case 0x39: do_scalar_FCM (cpu); return;
9382 case 0x3B: do_scalar_FCM (cpu); return;
9383 }
9384 break;
9385
9386 case 0xFD:
9387 switch (INSTR (15, 10))
9388 {
9389 case 0x0D: do_scalar_CMGT (cpu); return;
9390 case 0x11: do_scalar_USHL (cpu); return;
9391 case 0x2E: do_scalar_NEG (cpu); return;
9392 case 0x32: do_scalar_FCMGE_zero (cpu); return;
9393 case 0x35: do_scalar_FABD (cpu); return;
9394 case 0x36: do_scalar_FCMLE_zero (cpu); return;
9395 case 0x39: do_scalar_FCM (cpu); return;
9396 case 0x3B: do_scalar_FCM (cpu); return;
9397 default:
9398 HALT_NYI;
9399 }
9400
9401 case 0xFE: do_scalar_USHR (cpu); return;
9402
9403 case 0xBD:
9404 switch (INSTR (15, 10))
9405 {
9406 case 0x21: do_double_add (cpu); return;
9407 case 0x11: do_scalar_SSHL (cpu); return;
9408 case 0x32: do_scalar_FCMGT_zero (cpu); return;
9409 case 0x36: do_scalar_FCMEQ_zero (cpu); return;
9410 case 0x3A: do_scalar_FCMLT_zero (cpu); return;
9411 default:
9412 HALT_NYI;
9413 }
9414
9415 default:
9416 HALT_NYI;
9417 }
9418 }
9419
9420 static void
9421 dexAdvSIMD1 (sim_cpu *cpu)
9422 {
9423 /* instr [28,25] = 1 111. */
9424
9425 /* We are currently only interested in the basic
9426 scalar fp routines which all have bit 30 = 0. */
9427 if (INSTR (30, 30))
9428 do_scalar_vec (cpu);
9429
9430 /* instr[24] is set for FP data processing 3-source and clear for
9431 all other basic scalar fp instruction groups. */
9432 else if (INSTR (24, 24))
9433 dexSimpleFPDataProc3Source (cpu);
9434
9435 /* instr[21] is clear for floating <-> fixed conversions and set for
9436 all other basic scalar fp instruction groups. */
9437 else if (!INSTR (21, 21))
9438 dexSimpleFPFixedConvert (cpu);
9439
9440 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9441 11 ==> cond select, 00 ==> other. */
9442 else
9443 switch (INSTR (11, 10))
9444 {
9445 case 1: dexSimpleFPCondCompare (cpu); return;
9446 case 2: dexSimpleFPDataProc2Source (cpu); return;
9447 case 3: dexSimpleFPCondSelect (cpu); return;
9448
9449 default:
9450 /* Now an ordered cascade of tests.
9451 FP immediate has instr [12] == 1.
9452 FP compare has instr [13] == 1.
9453 FP Data Proc 1 Source has instr [14] == 1.
9454 FP floating <--> integer conversions has instr [15] == 0. */
9455 if (INSTR (12, 12))
9456 dexSimpleFPImmediate (cpu);
9457
9458 else if (INSTR (13, 13))
9459 dexSimpleFPCompare (cpu);
9460
9461 else if (INSTR (14, 14))
9462 dexSimpleFPDataProc1Source (cpu);
9463
9464 else if (!INSTR (15, 15))
9465 dexSimpleFPIntegerConvert (cpu);
9466
9467 else
9468 /* If we get here then instr[15] == 1 which means UNALLOC. */
9469 HALT_UNALLOC;
9470 }
9471 }
9472
9473 /* PC relative addressing. */
9474
9475 static void
9476 pcadr (sim_cpu *cpu)
9477 {
9478 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9479 instr[30,29] = immlo
9480 instr[23,5] = immhi. */
9481 uint64_t address;
9482 unsigned rd = INSTR (4, 0);
9483 uint32_t isPage = INSTR (31, 31);
9484 union { int64_t u64; uint64_t s64; } imm;
9485 uint64_t offset;
9486
9487 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9488 offset = imm.u64;
9489 offset = (offset << 2) | INSTR (30, 29);
9490
9491 address = aarch64_get_PC (cpu);
9492
9493 if (isPage)
9494 {
9495 offset <<= 12;
9496 address &= ~0xfff;
9497 }
9498
9499 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9500 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9501 }
9502
9503 /* Specific decode and execute for group Data Processing Immediate. */
9504
9505 static void
9506 dexPCRelAddressing (sim_cpu *cpu)
9507 {
9508 /* assert instr[28,24] = 10000. */
9509 pcadr (cpu);
9510 }
9511
9512 /* Immediate logical.
9513 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9514 16, 32 or 64 bit sequence pulled out at decode and possibly
9515 inverting it..
9516
9517 N.B. the output register (dest) can normally be Xn or SP
9518 the exception occurs for flag setting instructions which may
9519 only use Xn for the output (dest). The input register can
9520 never be SP. */
9521
9522 /* 32 bit and immediate. */
9523 static void
9524 and32 (sim_cpu *cpu, uint32_t bimm)
9525 {
9526 unsigned rn = INSTR (9, 5);
9527 unsigned rd = INSTR (4, 0);
9528
9529 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9530 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9531 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9532 }
9533
9534 /* 64 bit and immediate. */
9535 static void
9536 and64 (sim_cpu *cpu, uint64_t bimm)
9537 {
9538 unsigned rn = INSTR (9, 5);
9539 unsigned rd = INSTR (4, 0);
9540
9541 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9542 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9543 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9544 }
9545
9546 /* 32 bit and immediate set flags. */
9547 static void
9548 ands32 (sim_cpu *cpu, uint32_t bimm)
9549 {
9550 unsigned rn = INSTR (9, 5);
9551 unsigned rd = INSTR (4, 0);
9552
9553 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9554 uint32_t value2 = bimm;
9555
9556 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9557 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9558 set_flags_for_binop32 (cpu, value1 & value2);
9559 }
9560
9561 /* 64 bit and immediate set flags. */
9562 static void
9563 ands64 (sim_cpu *cpu, uint64_t bimm)
9564 {
9565 unsigned rn = INSTR (9, 5);
9566 unsigned rd = INSTR (4, 0);
9567
9568 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9569 uint64_t value2 = bimm;
9570
9571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9572 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9573 set_flags_for_binop64 (cpu, value1 & value2);
9574 }
9575
9576 /* 32 bit exclusive or immediate. */
9577 static void
9578 eor32 (sim_cpu *cpu, uint32_t bimm)
9579 {
9580 unsigned rn = INSTR (9, 5);
9581 unsigned rd = INSTR (4, 0);
9582
9583 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9584 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9585 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9586 }
9587
9588 /* 64 bit exclusive or immediate. */
9589 static void
9590 eor64 (sim_cpu *cpu, uint64_t bimm)
9591 {
9592 unsigned rn = INSTR (9, 5);
9593 unsigned rd = INSTR (4, 0);
9594
9595 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9596 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9597 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9598 }
9599
9600 /* 32 bit or immediate. */
9601 static void
9602 orr32 (sim_cpu *cpu, uint32_t bimm)
9603 {
9604 unsigned rn = INSTR (9, 5);
9605 unsigned rd = INSTR (4, 0);
9606
9607 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9608 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9609 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9610 }
9611
9612 /* 64 bit or immediate. */
9613 static void
9614 orr64 (sim_cpu *cpu, uint64_t bimm)
9615 {
9616 unsigned rn = INSTR (9, 5);
9617 unsigned rd = INSTR (4, 0);
9618
9619 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9620 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9621 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9622 }
9623
9624 /* Logical shifted register.
9625 These allow an optional LSL, ASR, LSR or ROR to the second source
9626 register with a count up to the register bit count.
9627 N.B register args may not be SP. */
9628
9629 /* 32 bit AND shifted register. */
9630 static void
9631 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9632 {
9633 unsigned rm = INSTR (20, 16);
9634 unsigned rn = INSTR (9, 5);
9635 unsigned rd = INSTR (4, 0);
9636
9637 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9638 aarch64_set_reg_u64
9639 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9640 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9641 }
9642
9643 /* 64 bit AND shifted register. */
9644 static void
9645 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9646 {
9647 unsigned rm = INSTR (20, 16);
9648 unsigned rn = INSTR (9, 5);
9649 unsigned rd = INSTR (4, 0);
9650
9651 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9652 aarch64_set_reg_u64
9653 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9654 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9655 }
9656
9657 /* 32 bit AND shifted register setting flags. */
9658 static void
9659 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9660 {
9661 unsigned rm = INSTR (20, 16);
9662 unsigned rn = INSTR (9, 5);
9663 unsigned rd = INSTR (4, 0);
9664
9665 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9666 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9667 shift, count);
9668
9669 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9670 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9671 set_flags_for_binop32 (cpu, value1 & value2);
9672 }
9673
9674 /* 64 bit AND shifted register setting flags. */
9675 static void
9676 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9677 {
9678 unsigned rm = INSTR (20, 16);
9679 unsigned rn = INSTR (9, 5);
9680 unsigned rd = INSTR (4, 0);
9681
9682 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9683 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9684 shift, count);
9685
9686 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9687 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9688 set_flags_for_binop64 (cpu, value1 & value2);
9689 }
9690
9691 /* 32 bit BIC shifted register. */
9692 static void
9693 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9694 {
9695 unsigned rm = INSTR (20, 16);
9696 unsigned rn = INSTR (9, 5);
9697 unsigned rd = INSTR (4, 0);
9698
9699 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9700 aarch64_set_reg_u64
9701 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9702 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9703 }
9704
9705 /* 64 bit BIC shifted register. */
9706 static void
9707 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9708 {
9709 unsigned rm = INSTR (20, 16);
9710 unsigned rn = INSTR (9, 5);
9711 unsigned rd = INSTR (4, 0);
9712
9713 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9714 aarch64_set_reg_u64
9715 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9716 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9717 }
9718
9719 /* 32 bit BIC shifted register setting flags. */
9720 static void
9721 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9722 {
9723 unsigned rm = INSTR (20, 16);
9724 unsigned rn = INSTR (9, 5);
9725 unsigned rd = INSTR (4, 0);
9726
9727 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9728 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9729 shift, count);
9730
9731 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9732 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9733 set_flags_for_binop32 (cpu, value1 & value2);
9734 }
9735
9736 /* 64 bit BIC shifted register setting flags. */
9737 static void
9738 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9739 {
9740 unsigned rm = INSTR (20, 16);
9741 unsigned rn = INSTR (9, 5);
9742 unsigned rd = INSTR (4, 0);
9743
9744 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9745 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9746 shift, count);
9747
9748 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9749 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9750 set_flags_for_binop64 (cpu, value1 & value2);
9751 }
9752
9753 /* 32 bit EON shifted register. */
9754 static void
9755 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9756 {
9757 unsigned rm = INSTR (20, 16);
9758 unsigned rn = INSTR (9, 5);
9759 unsigned rd = INSTR (4, 0);
9760
9761 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9762 aarch64_set_reg_u64
9763 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9764 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9765 }
9766
9767 /* 64 bit EON shifted register. */
9768 static void
9769 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9770 {
9771 unsigned rm = INSTR (20, 16);
9772 unsigned rn = INSTR (9, 5);
9773 unsigned rd = INSTR (4, 0);
9774
9775 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9776 aarch64_set_reg_u64
9777 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9778 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9779 }
9780
9781 /* 32 bit EOR shifted register. */
9782 static void
9783 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9784 {
9785 unsigned rm = INSTR (20, 16);
9786 unsigned rn = INSTR (9, 5);
9787 unsigned rd = INSTR (4, 0);
9788
9789 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9790 aarch64_set_reg_u64
9791 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9792 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9793 }
9794
9795 /* 64 bit EOR shifted register. */
9796 static void
9797 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9798 {
9799 unsigned rm = INSTR (20, 16);
9800 unsigned rn = INSTR (9, 5);
9801 unsigned rd = INSTR (4, 0);
9802
9803 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9804 aarch64_set_reg_u64
9805 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9806 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9807 }
9808
9809 /* 32 bit ORR shifted register. */
9810 static void
9811 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9812 {
9813 unsigned rm = INSTR (20, 16);
9814 unsigned rn = INSTR (9, 5);
9815 unsigned rd = INSTR (4, 0);
9816
9817 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9818 aarch64_set_reg_u64
9819 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9820 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9821 }
9822
9823 /* 64 bit ORR shifted register. */
9824 static void
9825 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9826 {
9827 unsigned rm = INSTR (20, 16);
9828 unsigned rn = INSTR (9, 5);
9829 unsigned rd = INSTR (4, 0);
9830
9831 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9832 aarch64_set_reg_u64
9833 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9834 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9835 }
9836
9837 /* 32 bit ORN shifted register. */
9838 static void
9839 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9840 {
9841 unsigned rm = INSTR (20, 16);
9842 unsigned rn = INSTR (9, 5);
9843 unsigned rd = INSTR (4, 0);
9844
9845 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9846 aarch64_set_reg_u64
9847 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9848 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9849 }
9850
9851 /* 64 bit ORN shifted register. */
9852 static void
9853 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9854 {
9855 unsigned rm = INSTR (20, 16);
9856 unsigned rn = INSTR (9, 5);
9857 unsigned rd = INSTR (4, 0);
9858
9859 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9860 aarch64_set_reg_u64
9861 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9862 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9863 }
9864
9865 static void
9866 dexLogicalImmediate (sim_cpu *cpu)
9867 {
9868 /* assert instr[28,23] = 1001000
9869 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9870 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9871 instr[22] = N : used to construct immediate mask
9872 instr[21,16] = immr
9873 instr[15,10] = imms
9874 instr[9,5] = Rn
9875 instr[4,0] = Rd */
9876
9877 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9878 uint32_t size = INSTR (31, 31);
9879 uint32_t N = INSTR (22, 22);
9880 /* uint32_t immr = INSTR (21, 16);. */
9881 /* uint32_t imms = INSTR (15, 10);. */
9882 uint32_t index = INSTR (22, 10);
9883 uint64_t bimm64 = LITable [index];
9884 uint32_t dispatch = INSTR (30, 29);
9885
9886 if (~size & N)
9887 HALT_UNALLOC;
9888
9889 if (!bimm64)
9890 HALT_UNALLOC;
9891
9892 if (size == 0)
9893 {
9894 uint32_t bimm = (uint32_t) bimm64;
9895
9896 switch (dispatch)
9897 {
9898 case 0: and32 (cpu, bimm); return;
9899 case 1: orr32 (cpu, bimm); return;
9900 case 2: eor32 (cpu, bimm); return;
9901 case 3: ands32 (cpu, bimm); return;
9902 }
9903 }
9904 else
9905 {
9906 switch (dispatch)
9907 {
9908 case 0: and64 (cpu, bimm64); return;
9909 case 1: orr64 (cpu, bimm64); return;
9910 case 2: eor64 (cpu, bimm64); return;
9911 case 3: ands64 (cpu, bimm64); return;
9912 }
9913 }
9914 HALT_UNALLOC;
9915 }
9916
9917 /* Immediate move.
9918 The uimm argument is a 16 bit value to be inserted into the
9919 target register the pos argument locates the 16 bit word in the
9920 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9921 3} for 64 bit.
9922 N.B register arg may not be SP so it should be.
9923 accessed using the setGZRegisterXXX accessors. */
9924
9925 /* 32 bit move 16 bit immediate zero remaining shorts. */
9926 static void
9927 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9928 {
9929 unsigned rd = INSTR (4, 0);
9930
9931 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9932 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9933 }
9934
9935 /* 64 bit move 16 bit immediate zero remaining shorts. */
9936 static void
9937 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9938 {
9939 unsigned rd = INSTR (4, 0);
9940
9941 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9942 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9943 }
9944
9945 /* 32 bit move 16 bit immediate negated. */
9946 static void
9947 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9948 {
9949 unsigned rd = INSTR (4, 0);
9950
9951 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9952 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9953 }
9954
9955 /* 64 bit move 16 bit immediate negated. */
9956 static void
9957 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9958 {
9959 unsigned rd = INSTR (4, 0);
9960
9961 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9962 aarch64_set_reg_u64
9963 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9964 ^ 0xffffffffffffffffULL));
9965 }
9966
9967 /* 32 bit move 16 bit immediate keep remaining shorts. */
9968 static void
9969 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9970 {
9971 unsigned rd = INSTR (4, 0);
9972 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9973 uint32_t value = val << (pos * 16);
9974 uint32_t mask = ~(0xffffU << (pos * 16));
9975
9976 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9977 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9978 }
9979
9980 /* 64 bit move 16 it immediate keep remaining shorts. */
9981 static void
9982 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9983 {
9984 unsigned rd = INSTR (4, 0);
9985 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9986 uint64_t value = (uint64_t) val << (pos * 16);
9987 uint64_t mask = ~(0xffffULL << (pos * 16));
9988
9989 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9990 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9991 }
9992
9993 static void
9994 dexMoveWideImmediate (sim_cpu *cpu)
9995 {
9996 /* assert instr[28:23] = 100101
9997 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9998 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9999 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
10000 instr[20,5] = uimm16
10001 instr[4,0] = Rd */
10002
10003 /* N.B. the (multiple of 16) shift is applied by the called routine,
10004 we just pass the multiplier. */
10005
10006 uint32_t imm;
10007 uint32_t size = INSTR (31, 31);
10008 uint32_t op = INSTR (30, 29);
10009 uint32_t shift = INSTR (22, 21);
10010
10011 /* 32 bit can only shift 0 or 1 lot of 16.
10012 anything else is an unallocated instruction. */
10013 if (size == 0 && (shift > 1))
10014 HALT_UNALLOC;
10015
10016 if (op == 1)
10017 HALT_UNALLOC;
10018
10019 imm = INSTR (20, 5);
10020
10021 if (size == 0)
10022 {
10023 if (op == 0)
10024 movn32 (cpu, imm, shift);
10025 else if (op == 2)
10026 movz32 (cpu, imm, shift);
10027 else
10028 movk32 (cpu, imm, shift);
10029 }
10030 else
10031 {
10032 if (op == 0)
10033 movn64 (cpu, imm, shift);
10034 else if (op == 2)
10035 movz64 (cpu, imm, shift);
10036 else
10037 movk64 (cpu, imm, shift);
10038 }
10039 }
10040
10041 /* Bitfield operations.
10042 These take a pair of bit positions r and s which are in {0..31}
10043 or {0..63} depending on the instruction word size.
10044 N.B register args may not be SP. */
10045
10046 /* OK, we start with ubfm which just needs to pick
10047 some bits out of source zero the rest and write
10048 the result to dest. Just need two logical shifts. */
10049
10050 /* 32 bit bitfield move, left and right of affected zeroed
10051 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10052 static void
10053 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10054 {
10055 unsigned rd;
10056 unsigned rn = INSTR (9, 5);
10057 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10058
10059 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
10060 if (r <= s)
10061 {
10062 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10063 We want only bits s:xxx:r at the bottom of the word
10064 so we LSL bit s up to bit 31 i.e. by 31 - s
10065 and then we LSR to bring bit 31 down to bit s - r
10066 i.e. by 31 + r - s. */
10067 value <<= 31 - s;
10068 value >>= 31 + r - s;
10069 }
10070 else
10071 {
10072 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
10073 We want only bits s:xxx:0 starting at it 31-(r-1)
10074 so we LSL bit s up to bit 31 i.e. by 31 - s
10075 and then we LSL to bring bit 31 down to 31-(r-1)+s
10076 i.e. by r - (s + 1). */
10077 value <<= 31 - s;
10078 value >>= r - (s + 1);
10079 }
10080
10081 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10082 rd = INSTR (4, 0);
10083 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10084 }
10085
10086 /* 64 bit bitfield move, left and right of affected zeroed
10087 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10088 static void
10089 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10090 {
10091 unsigned rd;
10092 unsigned rn = INSTR (9, 5);
10093 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10094
10095 if (r <= s)
10096 {
10097 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10098 We want only bits s:xxx:r at the bottom of the word.
10099 So we LSL bit s up to bit 63 i.e. by 63 - s
10100 and then we LSR to bring bit 63 down to bit s - r
10101 i.e. by 63 + r - s. */
10102 value <<= 63 - s;
10103 value >>= 63 + r - s;
10104 }
10105 else
10106 {
10107 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
10108 We want only bits s:xxx:0 starting at it 63-(r-1).
10109 So we LSL bit s up to bit 63 i.e. by 63 - s
10110 and then we LSL to bring bit 63 down to 63-(r-1)+s
10111 i.e. by r - (s + 1). */
10112 value <<= 63 - s;
10113 value >>= r - (s + 1);
10114 }
10115
10116 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10117 rd = INSTR (4, 0);
10118 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10119 }
10120
10121 /* The signed versions need to insert sign bits
10122 on the left of the inserted bit field. so we do
10123 much the same as the unsigned version except we
10124 use an arithmetic shift right -- this just means
10125 we need to operate on signed values. */
10126
10127 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
10128 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10129 static void
10130 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10131 {
10132 unsigned rd;
10133 unsigned rn = INSTR (9, 5);
10134 /* as per ubfm32 but use an ASR instead of an LSR. */
10135 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
10136
10137 if (r <= s)
10138 {
10139 value <<= 31 - s;
10140 value >>= 31 + r - s;
10141 }
10142 else
10143 {
10144 value <<= 31 - s;
10145 value >>= r - (s + 1);
10146 }
10147
10148 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10149 rd = INSTR (4, 0);
10150 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
10151 }
10152
10153 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
10154 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10155 static void
10156 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10157 {
10158 unsigned rd;
10159 unsigned rn = INSTR (9, 5);
10160 /* acpu per ubfm but use an ASR instead of an LSR. */
10161 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
10162
10163 if (r <= s)
10164 {
10165 value <<= 63 - s;
10166 value >>= 63 + r - s;
10167 }
10168 else
10169 {
10170 value <<= 63 - s;
10171 value >>= r - (s + 1);
10172 }
10173
10174 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10175 rd = INSTR (4, 0);
10176 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
10177 }
10178
10179 /* Finally, these versions leave non-affected bits
10180 as is. so we need to generate the bits as per
10181 ubfm and also generate a mask to pick the
10182 bits from the original and computed values. */
10183
10184 /* 32 bit bitfield move, non-affected bits left as is.
10185 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10186 static void
10187 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10188 {
10189 unsigned rn = INSTR (9, 5);
10190 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10191 uint32_t mask = -1;
10192 unsigned rd;
10193 uint32_t value2;
10194
10195 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
10196 if (r <= s)
10197 {
10198 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10199 We want only bits s:xxx:r at the bottom of the word
10200 so we LSL bit s up to bit 31 i.e. by 31 - s
10201 and then we LSR to bring bit 31 down to bit s - r
10202 i.e. by 31 + r - s. */
10203 value <<= 31 - s;
10204 value >>= 31 + r - s;
10205 /* the mask must include the same bits. */
10206 mask <<= 31 - s;
10207 mask >>= 31 + r - s;
10208 }
10209 else
10210 {
10211 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
10212 We want only bits s:xxx:0 starting at it 31-(r-1)
10213 so we LSL bit s up to bit 31 i.e. by 31 - s
10214 and then we LSL to bring bit 31 down to 31-(r-1)+s
10215 i.e. by r - (s + 1). */
10216 value <<= 31 - s;
10217 value >>= r - (s + 1);
10218 /* The mask must include the same bits. */
10219 mask <<= 31 - s;
10220 mask >>= r - (s + 1);
10221 }
10222
10223 rd = INSTR (4, 0);
10224 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10225
10226 value2 &= ~mask;
10227 value2 |= value;
10228
10229 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10230 aarch64_set_reg_u64
10231 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
10232 }
10233
10234 /* 64 bit bitfield move, non-affected bits left as is.
10235 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10236 static void
10237 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10238 {
10239 unsigned rd;
10240 unsigned rn = INSTR (9, 5);
10241 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10242 uint64_t mask = 0xffffffffffffffffULL;
10243
10244 if (r <= s)
10245 {
10246 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10247 We want only bits s:xxx:r at the bottom of the word
10248 so we LSL bit s up to bit 63 i.e. by 63 - s
10249 and then we LSR to bring bit 63 down to bit s - r
10250 i.e. by 63 + r - s. */
10251 value <<= 63 - s;
10252 value >>= 63 + r - s;
10253 /* The mask must include the same bits. */
10254 mask <<= 63 - s;
10255 mask >>= 63 + r - s;
10256 }
10257 else
10258 {
10259 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10260 We want only bits s:xxx:0 starting at it 63-(r-1)
10261 so we LSL bit s up to bit 63 i.e. by 63 - s
10262 and then we LSL to bring bit 63 down to 63-(r-1)+s
10263 i.e. by r - (s + 1). */
10264 value <<= 63 - s;
10265 value >>= r - (s + 1);
10266 /* The mask must include the same bits. */
10267 mask <<= 63 - s;
10268 mask >>= r - (s + 1);
10269 }
10270
10271 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10272 rd = INSTR (4, 0);
10273 aarch64_set_reg_u64
10274 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10275 }
10276
10277 static void
10278 dexBitfieldImmediate (sim_cpu *cpu)
10279 {
10280 /* assert instr[28:23] = 100110
10281 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10282 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10283 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10284 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10285 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10286 instr[9,5] = Rn
10287 instr[4,0] = Rd */
10288
10289 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10290 uint32_t dispatch;
10291 uint32_t imms;
10292 uint32_t size = INSTR (31, 31);
10293 uint32_t N = INSTR (22, 22);
10294 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
10295 /* or else we have an UNALLOC. */
10296 uint32_t immr = INSTR (21, 16);
10297
10298 if (~size & N)
10299 HALT_UNALLOC;
10300
10301 if (!size && uimm (immr, 5, 5))
10302 HALT_UNALLOC;
10303
10304 imms = INSTR (15, 10);
10305 if (!size && uimm (imms, 5, 5))
10306 HALT_UNALLOC;
10307
10308 /* Switch on combined size and op. */
10309 dispatch = INSTR (31, 29);
10310 switch (dispatch)
10311 {
10312 case 0: sbfm32 (cpu, immr, imms); return;
10313 case 1: bfm32 (cpu, immr, imms); return;
10314 case 2: ubfm32 (cpu, immr, imms); return;
10315 case 4: sbfm (cpu, immr, imms); return;
10316 case 5: bfm (cpu, immr, imms); return;
10317 case 6: ubfm (cpu, immr, imms); return;
10318 default: HALT_UNALLOC;
10319 }
10320 }
10321
10322 static void
10323 do_EXTR_32 (sim_cpu *cpu)
10324 {
10325 /* instr[31:21] = 00010011100
10326 instr[20,16] = Rm
10327 instr[15,10] = imms : 0xxxxx for 32 bit
10328 instr[9,5] = Rn
10329 instr[4,0] = Rd */
10330 unsigned rm = INSTR (20, 16);
10331 unsigned imms = INSTR (15, 10) & 31;
10332 unsigned rn = INSTR ( 9, 5);
10333 unsigned rd = INSTR ( 4, 0);
10334 uint64_t val1;
10335 uint64_t val2;
10336
10337 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10338 val1 >>= imms;
10339 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10340 val2 <<= (32 - imms);
10341
10342 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10343 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10344 }
10345
10346 static void
10347 do_EXTR_64 (sim_cpu *cpu)
10348 {
10349 /* instr[31:21] = 10010011100
10350 instr[20,16] = Rm
10351 instr[15,10] = imms
10352 instr[9,5] = Rn
10353 instr[4,0] = Rd */
10354 unsigned rm = INSTR (20, 16);
10355 unsigned imms = INSTR (15, 10) & 63;
10356 unsigned rn = INSTR ( 9, 5);
10357 unsigned rd = INSTR ( 4, 0);
10358 uint64_t val;
10359
10360 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10361 val >>= imms;
10362 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10363
10364 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10365 }
10366
10367 static void
10368 dexExtractImmediate (sim_cpu *cpu)
10369 {
10370 /* assert instr[28:23] = 100111
10371 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10372 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10373 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10374 instr[21] = op0 : must be 0 or UNALLOC
10375 instr[20,16] = Rm
10376 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10377 instr[9,5] = Rn
10378 instr[4,0] = Rd */
10379
10380 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10381 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10382 uint32_t dispatch;
10383 uint32_t size = INSTR (31, 31);
10384 uint32_t N = INSTR (22, 22);
10385 /* 32 bit operations must have imms[5] = 0
10386 or else we have an UNALLOC. */
10387 uint32_t imms = INSTR (15, 10);
10388
10389 if (size ^ N)
10390 HALT_UNALLOC;
10391
10392 if (!size && uimm (imms, 5, 5))
10393 HALT_UNALLOC;
10394
10395 /* Switch on combined size and op. */
10396 dispatch = INSTR (31, 29);
10397
10398 if (dispatch == 0)
10399 do_EXTR_32 (cpu);
10400
10401 else if (dispatch == 4)
10402 do_EXTR_64 (cpu);
10403
10404 else if (dispatch == 1)
10405 HALT_NYI;
10406 else
10407 HALT_UNALLOC;
10408 }
10409
10410 static void
10411 dexDPImm (sim_cpu *cpu)
10412 {
10413 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10414 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10415 bits [25,23] of a DPImm are the secondary dispatch vector. */
10416 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10417
10418 switch (group2)
10419 {
10420 case DPIMM_PCADR_000:
10421 case DPIMM_PCADR_001:
10422 dexPCRelAddressing (cpu);
10423 return;
10424
10425 case DPIMM_ADDSUB_010:
10426 case DPIMM_ADDSUB_011:
10427 dexAddSubtractImmediate (cpu);
10428 return;
10429
10430 case DPIMM_LOG_100:
10431 dexLogicalImmediate (cpu);
10432 return;
10433
10434 case DPIMM_MOV_101:
10435 dexMoveWideImmediate (cpu);
10436 return;
10437
10438 case DPIMM_BITF_110:
10439 dexBitfieldImmediate (cpu);
10440 return;
10441
10442 case DPIMM_EXTR_111:
10443 dexExtractImmediate (cpu);
10444 return;
10445
10446 default:
10447 /* Should never reach here. */
10448 HALT_NYI;
10449 }
10450 }
10451
10452 static void
10453 dexLoadUnscaledImmediate (sim_cpu *cpu)
10454 {
10455 /* instr[29,24] == 111_00
10456 instr[21] == 0
10457 instr[11,10] == 00
10458 instr[31,30] = size
10459 instr[26] = V
10460 instr[23,22] = opc
10461 instr[20,12] = simm9
10462 instr[9,5] = rn may be SP. */
10463 /* unsigned rt = INSTR (4, 0); */
10464 uint32_t V = INSTR (26, 26);
10465 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10466 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10467
10468 if (!V)
10469 {
10470 /* GReg operations. */
10471 switch (dispatch)
10472 {
10473 case 0: sturb (cpu, imm); return;
10474 case 1: ldurb32 (cpu, imm); return;
10475 case 2: ldursb64 (cpu, imm); return;
10476 case 3: ldursb32 (cpu, imm); return;
10477 case 4: sturh (cpu, imm); return;
10478 case 5: ldurh32 (cpu, imm); return;
10479 case 6: ldursh64 (cpu, imm); return;
10480 case 7: ldursh32 (cpu, imm); return;
10481 case 8: stur32 (cpu, imm); return;
10482 case 9: ldur32 (cpu, imm); return;
10483 case 10: ldursw (cpu, imm); return;
10484 case 12: stur64 (cpu, imm); return;
10485 case 13: ldur64 (cpu, imm); return;
10486
10487 case 14:
10488 /* PRFUM NYI. */
10489 HALT_NYI;
10490
10491 default:
10492 case 11:
10493 case 15:
10494 HALT_UNALLOC;
10495 }
10496 }
10497
10498 /* FReg operations. */
10499 switch (dispatch)
10500 {
10501 case 2: fsturq (cpu, imm); return;
10502 case 3: fldurq (cpu, imm); return;
10503 case 8: fsturs (cpu, imm); return;
10504 case 9: fldurs (cpu, imm); return;
10505 case 12: fsturd (cpu, imm); return;
10506 case 13: fldurd (cpu, imm); return;
10507
10508 case 0: /* STUR 8 bit FP. */
10509 case 1: /* LDUR 8 bit FP. */
10510 case 4: /* STUR 16 bit FP. */
10511 case 5: /* LDUR 8 bit FP. */
10512 HALT_NYI;
10513
10514 default:
10515 case 6:
10516 case 7:
10517 case 10:
10518 case 11:
10519 case 14:
10520 case 15:
10521 HALT_UNALLOC;
10522 }
10523 }
10524
10525 /* N.B. A preliminary note regarding all the ldrs<x>32
10526 instructions
10527
10528 The signed value loaded by these instructions is cast to unsigned
10529 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10530 64 bit element of the GReg union. this performs a 32 bit sign extension
10531 (as required) but avoids 64 bit sign extension, thus ensuring that the
10532 top half of the register word is zero. this is what the spec demands
10533 when a 32 bit load occurs. */
10534
10535 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10536 static void
10537 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10538 {
10539 unsigned int rn = INSTR (9, 5);
10540 unsigned int rt = INSTR (4, 0);
10541
10542 /* The target register may not be SP but the source may be
10543 there is no scaling required for a byte load. */
10544 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10545 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10546 (int64_t) aarch64_get_mem_s8 (cpu, address));
10547 }
10548
10549 /* 32 bit load sign-extended byte scaled or unscaled zero-
10550 or sign-extended 32-bit register offset. */
10551 static void
10552 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10553 {
10554 unsigned int rm = INSTR (20, 16);
10555 unsigned int rn = INSTR (9, 5);
10556 unsigned int rt = INSTR (4, 0);
10557
10558 /* rn may reference SP, rm and rt must reference ZR. */
10559
10560 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10561 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10562 extension);
10563
10564 /* There is no scaling required for a byte load. */
10565 aarch64_set_reg_u64
10566 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10567 + displacement));
10568 }
10569
10570 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10571 pre- or post-writeback. */
10572 static void
10573 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10574 {
10575 uint64_t address;
10576 unsigned int rn = INSTR (9, 5);
10577 unsigned int rt = INSTR (4, 0);
10578
10579 if (rn == rt && wb != NoWriteBack)
10580 HALT_UNALLOC;
10581
10582 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10583
10584 if (wb == Pre)
10585 address += offset;
10586
10587 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10588 (int64_t) aarch64_get_mem_s8 (cpu, address));
10589
10590 if (wb == Post)
10591 address += offset;
10592
10593 if (wb != NoWriteBack)
10594 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10595 }
10596
10597 /* 8 bit store scaled. */
10598 static void
10599 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10600 {
10601 unsigned st = INSTR (4, 0);
10602 unsigned rn = INSTR (9, 5);
10603
10604 aarch64_set_mem_u8 (cpu,
10605 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10606 aarch64_get_vec_u8 (cpu, st, 0));
10607 }
10608
10609 /* 8 bit store scaled or unscaled zero- or
10610 sign-extended 8-bit register offset. */
10611 static void
10612 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10613 {
10614 unsigned rm = INSTR (20, 16);
10615 unsigned rn = INSTR (9, 5);
10616 unsigned st = INSTR (4, 0);
10617
10618 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10619 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10620 extension);
10621 uint64_t displacement = scaling == Scaled ? extended : 0;
10622
10623 aarch64_set_mem_u8
10624 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10625 }
10626
10627 /* 16 bit store scaled. */
10628 static void
10629 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10630 {
10631 unsigned st = INSTR (4, 0);
10632 unsigned rn = INSTR (9, 5);
10633
10634 aarch64_set_mem_u16
10635 (cpu,
10636 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10637 aarch64_get_vec_u16 (cpu, st, 0));
10638 }
10639
10640 /* 16 bit store scaled or unscaled zero-
10641 or sign-extended 16-bit register offset. */
10642 static void
10643 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10644 {
10645 unsigned rm = INSTR (20, 16);
10646 unsigned rn = INSTR (9, 5);
10647 unsigned st = INSTR (4, 0);
10648
10649 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10650 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10651 extension);
10652 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10653
10654 aarch64_set_mem_u16
10655 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10656 }
10657
10658 /* 32 bit store scaled unsigned 12 bit. */
10659 static void
10660 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10661 {
10662 unsigned st = INSTR (4, 0);
10663 unsigned rn = INSTR (9, 5);
10664
10665 aarch64_set_mem_u32
10666 (cpu,
10667 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10668 aarch64_get_vec_u32 (cpu, st, 0));
10669 }
10670
10671 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10672 static void
10673 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10674 {
10675 unsigned rn = INSTR (9, 5);
10676 unsigned st = INSTR (4, 0);
10677
10678 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10679
10680 if (wb != Post)
10681 address += offset;
10682
10683 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10684
10685 if (wb == Post)
10686 address += offset;
10687
10688 if (wb != NoWriteBack)
10689 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10690 }
10691
10692 /* 32 bit store scaled or unscaled zero-
10693 or sign-extended 32-bit register offset. */
10694 static void
10695 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10696 {
10697 unsigned rm = INSTR (20, 16);
10698 unsigned rn = INSTR (9, 5);
10699 unsigned st = INSTR (4, 0);
10700
10701 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10702 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10703 extension);
10704 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10705
10706 aarch64_set_mem_u32
10707 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10708 }
10709
10710 /* 64 bit store scaled unsigned 12 bit. */
10711 static void
10712 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10713 {
10714 unsigned st = INSTR (4, 0);
10715 unsigned rn = INSTR (9, 5);
10716
10717 aarch64_set_mem_u64
10718 (cpu,
10719 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10720 aarch64_get_vec_u64 (cpu, st, 0));
10721 }
10722
10723 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10724 static void
10725 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10726 {
10727 unsigned rn = INSTR (9, 5);
10728 unsigned st = INSTR (4, 0);
10729
10730 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10731
10732 if (wb != Post)
10733 address += offset;
10734
10735 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10736
10737 if (wb == Post)
10738 address += offset;
10739
10740 if (wb != NoWriteBack)
10741 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10742 }
10743
10744 /* 64 bit store scaled or unscaled zero-
10745 or sign-extended 32-bit register offset. */
10746 static void
10747 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10748 {
10749 unsigned rm = INSTR (20, 16);
10750 unsigned rn = INSTR (9, 5);
10751 unsigned st = INSTR (4, 0);
10752
10753 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10754 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10755 extension);
10756 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10757
10758 aarch64_set_mem_u64
10759 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10760 }
10761
10762 /* 128 bit store scaled unsigned 12 bit. */
10763 static void
10764 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10765 {
10766 FRegister a;
10767 unsigned st = INSTR (4, 0);
10768 unsigned rn = INSTR (9, 5);
10769 uint64_t addr;
10770
10771 aarch64_get_FP_long_double (cpu, st, & a);
10772
10773 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10774 aarch64_set_mem_long_double (cpu, addr, a);
10775 }
10776
10777 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10778 static void
10779 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10780 {
10781 FRegister a;
10782 unsigned rn = INSTR (9, 5);
10783 unsigned st = INSTR (4, 0);
10784 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10785
10786 if (wb != Post)
10787 address += offset;
10788
10789 aarch64_get_FP_long_double (cpu, st, & a);
10790 aarch64_set_mem_long_double (cpu, address, a);
10791
10792 if (wb == Post)
10793 address += offset;
10794
10795 if (wb != NoWriteBack)
10796 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10797 }
10798
10799 /* 128 bit store scaled or unscaled zero-
10800 or sign-extended 32-bit register offset. */
10801 static void
10802 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10803 {
10804 unsigned rm = INSTR (20, 16);
10805 unsigned rn = INSTR (9, 5);
10806 unsigned st = INSTR (4, 0);
10807
10808 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10809 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10810 extension);
10811 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10812
10813 FRegister a;
10814
10815 aarch64_get_FP_long_double (cpu, st, & a);
10816 aarch64_set_mem_long_double (cpu, address + displacement, a);
10817 }
10818
10819 static void
10820 dexLoadImmediatePrePost (sim_cpu *cpu)
10821 {
10822 /* instr[31,30] = size
10823 instr[29,27] = 111
10824 instr[26] = V
10825 instr[25,24] = 00
10826 instr[23,22] = opc
10827 instr[21] = 0
10828 instr[20,12] = simm9
10829 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10830 instr[10] = 0
10831 instr[9,5] = Rn may be SP.
10832 instr[4,0] = Rt */
10833
10834 uint32_t V = INSTR (26, 26);
10835 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10836 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10837 WriteBack wb = INSTR (11, 11);
10838
10839 if (!V)
10840 {
10841 /* GReg operations. */
10842 switch (dispatch)
10843 {
10844 case 0: strb_wb (cpu, imm, wb); return;
10845 case 1: ldrb32_wb (cpu, imm, wb); return;
10846 case 2: ldrsb_wb (cpu, imm, wb); return;
10847 case 3: ldrsb32_wb (cpu, imm, wb); return;
10848 case 4: strh_wb (cpu, imm, wb); return;
10849 case 5: ldrh32_wb (cpu, imm, wb); return;
10850 case 6: ldrsh64_wb (cpu, imm, wb); return;
10851 case 7: ldrsh32_wb (cpu, imm, wb); return;
10852 case 8: str32_wb (cpu, imm, wb); return;
10853 case 9: ldr32_wb (cpu, imm, wb); return;
10854 case 10: ldrsw_wb (cpu, imm, wb); return;
10855 case 12: str_wb (cpu, imm, wb); return;
10856 case 13: ldr_wb (cpu, imm, wb); return;
10857
10858 default:
10859 case 11:
10860 case 14:
10861 case 15:
10862 HALT_UNALLOC;
10863 }
10864 }
10865
10866 /* FReg operations. */
10867 switch (dispatch)
10868 {
10869 case 2: fstrq_wb (cpu, imm, wb); return;
10870 case 3: fldrq_wb (cpu, imm, wb); return;
10871 case 8: fstrs_wb (cpu, imm, wb); return;
10872 case 9: fldrs_wb (cpu, imm, wb); return;
10873 case 12: fstrd_wb (cpu, imm, wb); return;
10874 case 13: fldrd_wb (cpu, imm, wb); return;
10875
10876 case 0: /* STUR 8 bit FP. */
10877 case 1: /* LDUR 8 bit FP. */
10878 case 4: /* STUR 16 bit FP. */
10879 case 5: /* LDUR 8 bit FP. */
10880 HALT_NYI;
10881
10882 default:
10883 case 6:
10884 case 7:
10885 case 10:
10886 case 11:
10887 case 14:
10888 case 15:
10889 HALT_UNALLOC;
10890 }
10891 }
10892
10893 static void
10894 dexLoadRegisterOffset (sim_cpu *cpu)
10895 {
10896 /* instr[31,30] = size
10897 instr[29,27] = 111
10898 instr[26] = V
10899 instr[25,24] = 00
10900 instr[23,22] = opc
10901 instr[21] = 1
10902 instr[20,16] = rm
10903 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10904 110 ==> SXTW, 111 ==> SXTX,
10905 ow ==> RESERVED
10906 instr[12] = scaled
10907 instr[11,10] = 10
10908 instr[9,5] = rn
10909 instr[4,0] = rt. */
10910
10911 uint32_t V = INSTR (26, 26);
10912 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10913 Scaling scale = INSTR (12, 12);
10914 Extension extensionType = INSTR (15, 13);
10915
10916 /* Check for illegal extension types. */
10917 if (uimm (extensionType, 1, 1) == 0)
10918 HALT_UNALLOC;
10919
10920 if (extensionType == UXTX || extensionType == SXTX)
10921 extensionType = NoExtension;
10922
10923 if (!V)
10924 {
10925 /* GReg operations. */
10926 switch (dispatch)
10927 {
10928 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10929 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10930 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10931 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10932 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10933 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10934 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10935 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10936 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10937 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10938 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10939 case 12: str_scale_ext (cpu, scale, extensionType); return;
10940 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10941 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10942
10943 default:
10944 case 11:
10945 case 15:
10946 HALT_UNALLOC;
10947 }
10948 }
10949
10950 /* FReg operations. */
10951 switch (dispatch)
10952 {
10953 case 1: /* LDUR 8 bit FP. */
10954 HALT_NYI;
10955 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10956 case 5: /* LDUR 8 bit FP. */
10957 HALT_NYI;
10958 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10959 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10960
10961 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
10962 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
10963 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
10964 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
10965 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10966
10967 default:
10968 case 6:
10969 case 7:
10970 case 10:
10971 case 11:
10972 case 14:
10973 case 15:
10974 HALT_UNALLOC;
10975 }
10976 }
10977
10978 static void
10979 dexLoadUnsignedImmediate (sim_cpu *cpu)
10980 {
10981 /* instr[29,24] == 111_01
10982 instr[31,30] = size
10983 instr[26] = V
10984 instr[23,22] = opc
10985 instr[21,10] = uimm12 : unsigned immediate offset
10986 instr[9,5] = rn may be SP.
10987 instr[4,0] = rt. */
10988
10989 uint32_t V = INSTR (26,26);
10990 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10991 uint32_t imm = INSTR (21, 10);
10992
10993 if (!V)
10994 {
10995 /* GReg operations. */
10996 switch (dispatch)
10997 {
10998 case 0: strb_abs (cpu, imm); return;
10999 case 1: ldrb32_abs (cpu, imm); return;
11000 case 2: ldrsb_abs (cpu, imm); return;
11001 case 3: ldrsb32_abs (cpu, imm); return;
11002 case 4: strh_abs (cpu, imm); return;
11003 case 5: ldrh32_abs (cpu, imm); return;
11004 case 6: ldrsh_abs (cpu, imm); return;
11005 case 7: ldrsh32_abs (cpu, imm); return;
11006 case 8: str32_abs (cpu, imm); return;
11007 case 9: ldr32_abs (cpu, imm); return;
11008 case 10: ldrsw_abs (cpu, imm); return;
11009 case 12: str_abs (cpu, imm); return;
11010 case 13: ldr_abs (cpu, imm); return;
11011 case 14: prfm_abs (cpu, imm); return;
11012
11013 default:
11014 case 11:
11015 case 15:
11016 HALT_UNALLOC;
11017 }
11018 }
11019
11020 /* FReg operations. */
11021 switch (dispatch)
11022 {
11023 case 0: fstrb_abs (cpu, imm); return;
11024 case 4: fstrh_abs (cpu, imm); return;
11025 case 8: fstrs_abs (cpu, imm); return;
11026 case 12: fstrd_abs (cpu, imm); return;
11027 case 2: fstrq_abs (cpu, imm); return;
11028
11029 case 1: fldrb_abs (cpu, imm); return;
11030 case 5: fldrh_abs (cpu, imm); return;
11031 case 9: fldrs_abs (cpu, imm); return;
11032 case 13: fldrd_abs (cpu, imm); return;
11033 case 3: fldrq_abs (cpu, imm); return;
11034
11035 default:
11036 case 6:
11037 case 7:
11038 case 10:
11039 case 11:
11040 case 14:
11041 case 15:
11042 HALT_UNALLOC;
11043 }
11044 }
11045
11046 static void
11047 dexLoadExclusive (sim_cpu *cpu)
11048 {
11049 /* assert instr[29:24] = 001000;
11050 instr[31,30] = size
11051 instr[23] = 0 if exclusive
11052 instr[22] = L : 1 if load, 0 if store
11053 instr[21] = 1 if pair
11054 instr[20,16] = Rs
11055 instr[15] = o0 : 1 if ordered
11056 instr[14,10] = Rt2
11057 instr[9,5] = Rn
11058 instr[4.0] = Rt. */
11059
11060 switch (INSTR (22, 21))
11061 {
11062 case 2: ldxr (cpu); return;
11063 case 0: stxr (cpu); return;
11064 default: HALT_NYI;
11065 }
11066 }
11067
11068 static void
11069 dexLoadOther (sim_cpu *cpu)
11070 {
11071 uint32_t dispatch;
11072
11073 /* instr[29,25] = 111_0
11074 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
11075 instr[21:11,10] is the secondary dispatch. */
11076 if (INSTR (24, 24))
11077 {
11078 dexLoadUnsignedImmediate (cpu);
11079 return;
11080 }
11081
11082 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
11083 switch (dispatch)
11084 {
11085 case 0: dexLoadUnscaledImmediate (cpu); return;
11086 case 1: dexLoadImmediatePrePost (cpu); return;
11087 case 3: dexLoadImmediatePrePost (cpu); return;
11088 case 6: dexLoadRegisterOffset (cpu); return;
11089
11090 default:
11091 case 2:
11092 case 4:
11093 case 5:
11094 case 7:
11095 HALT_NYI;
11096 }
11097 }
11098
11099 static void
11100 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11101 {
11102 unsigned rn = INSTR (14, 10);
11103 unsigned rd = INSTR (9, 5);
11104 unsigned rm = INSTR (4, 0);
11105 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11106
11107 if ((rn == rd || rm == rd) && wb != NoWriteBack)
11108 HALT_UNALLOC; /* ??? */
11109
11110 offset <<= 2;
11111
11112 if (wb != Post)
11113 address += offset;
11114
11115 aarch64_set_mem_u32 (cpu, address,
11116 aarch64_get_reg_u32 (cpu, rm, NO_SP));
11117 aarch64_set_mem_u32 (cpu, address + 4,
11118 aarch64_get_reg_u32 (cpu, rn, NO_SP));
11119
11120 if (wb == Post)
11121 address += offset;
11122
11123 if (wb != NoWriteBack)
11124 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11125 }
11126
11127 static void
11128 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11129 {
11130 unsigned rn = INSTR (14, 10);
11131 unsigned rd = INSTR (9, 5);
11132 unsigned rm = INSTR (4, 0);
11133 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11134
11135 if ((rn == rd || rm == rd) && wb != NoWriteBack)
11136 HALT_UNALLOC; /* ??? */
11137
11138 offset <<= 3;
11139
11140 if (wb != Post)
11141 address += offset;
11142
11143 aarch64_set_mem_u64 (cpu, address,
11144 aarch64_get_reg_u64 (cpu, rm, NO_SP));
11145 aarch64_set_mem_u64 (cpu, address + 8,
11146 aarch64_get_reg_u64 (cpu, rn, NO_SP));
11147
11148 if (wb == Post)
11149 address += offset;
11150
11151 if (wb != NoWriteBack)
11152 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11153 }
11154
11155 static void
11156 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11157 {
11158 unsigned rn = INSTR (14, 10);
11159 unsigned rd = INSTR (9, 5);
11160 unsigned rm = INSTR (4, 0);
11161 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11162
11163 /* Treat this as unalloc to make sure we don't do it. */
11164 if (rn == rm)
11165 HALT_UNALLOC;
11166
11167 offset <<= 2;
11168
11169 if (wb != Post)
11170 address += offset;
11171
11172 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
11173 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
11174
11175 if (wb == Post)
11176 address += offset;
11177
11178 if (wb != NoWriteBack)
11179 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11180 }
11181
11182 static void
11183 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11184 {
11185 unsigned rn = INSTR (14, 10);
11186 unsigned rd = INSTR (9, 5);
11187 unsigned rm = INSTR (4, 0);
11188 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11189
11190 /* Treat this as unalloc to make sure we don't do it. */
11191 if (rn == rm)
11192 HALT_UNALLOC;
11193
11194 offset <<= 2;
11195
11196 if (wb != Post)
11197 address += offset;
11198
11199 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
11200 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
11201
11202 if (wb == Post)
11203 address += offset;
11204
11205 if (wb != NoWriteBack)
11206 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11207 }
11208
11209 static void
11210 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11211 {
11212 unsigned rn = INSTR (14, 10);
11213 unsigned rd = INSTR (9, 5);
11214 unsigned rm = INSTR (4, 0);
11215 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11216
11217 /* Treat this as unalloc to make sure we don't do it. */
11218 if (rn == rm)
11219 HALT_UNALLOC;
11220
11221 offset <<= 3;
11222
11223 if (wb != Post)
11224 address += offset;
11225
11226 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
11227 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
11228
11229 if (wb == Post)
11230 address += offset;
11231
11232 if (wb != NoWriteBack)
11233 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11234 }
11235
11236 static void
11237 dex_load_store_pair_gr (sim_cpu *cpu)
11238 {
11239 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
11240 instr[29,25] = instruction encoding: 101_0
11241 instr[26] = V : 1 if fp 0 if gp
11242 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11243 instr[22] = load/store (1=> load)
11244 instr[21,15] = signed, scaled, offset
11245 instr[14,10] = Rn
11246 instr[ 9, 5] = Rd
11247 instr[ 4, 0] = Rm. */
11248
11249 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11250 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11251
11252 switch (dispatch)
11253 {
11254 case 2: store_pair_u32 (cpu, offset, Post); return;
11255 case 3: load_pair_u32 (cpu, offset, Post); return;
11256 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11257 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
11258 case 6: store_pair_u32 (cpu, offset, Pre); return;
11259 case 7: load_pair_u32 (cpu, offset, Pre); return;
11260
11261 case 11: load_pair_s32 (cpu, offset, Post); return;
11262 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
11263 case 15: load_pair_s32 (cpu, offset, Pre); return;
11264
11265 case 18: store_pair_u64 (cpu, offset, Post); return;
11266 case 19: load_pair_u64 (cpu, offset, Post); return;
11267 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11268 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
11269 case 22: store_pair_u64 (cpu, offset, Pre); return;
11270 case 23: load_pair_u64 (cpu, offset, Pre); return;
11271
11272 default:
11273 HALT_UNALLOC;
11274 }
11275 }
11276
11277 static void
11278 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11279 {
11280 unsigned rn = INSTR (14, 10);
11281 unsigned rd = INSTR (9, 5);
11282 unsigned rm = INSTR (4, 0);
11283 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11284
11285 offset <<= 2;
11286
11287 if (wb != Post)
11288 address += offset;
11289
11290 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
11291 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11292
11293 if (wb == Post)
11294 address += offset;
11295
11296 if (wb != NoWriteBack)
11297 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11298 }
11299
11300 static void
11301 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11302 {
11303 unsigned rn = INSTR (14, 10);
11304 unsigned rd = INSTR (9, 5);
11305 unsigned rm = INSTR (4, 0);
11306 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11307
11308 offset <<= 3;
11309
11310 if (wb != Post)
11311 address += offset;
11312
11313 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
11314 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11315
11316 if (wb == Post)
11317 address += offset;
11318
11319 if (wb != NoWriteBack)
11320 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11321 }
11322
11323 static void
11324 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11325 {
11326 FRegister a;
11327 unsigned rn = INSTR (14, 10);
11328 unsigned rd = INSTR (9, 5);
11329 unsigned rm = INSTR (4, 0);
11330 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11331
11332 offset <<= 4;
11333
11334 if (wb != Post)
11335 address += offset;
11336
11337 aarch64_get_FP_long_double (cpu, rm, & a);
11338 aarch64_set_mem_long_double (cpu, address, a);
11339 aarch64_get_FP_long_double (cpu, rn, & a);
11340 aarch64_set_mem_long_double (cpu, address + 16, a);
11341
11342 if (wb == Post)
11343 address += offset;
11344
11345 if (wb != NoWriteBack)
11346 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11347 }
11348
11349 static void
11350 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11351 {
11352 unsigned rn = INSTR (14, 10);
11353 unsigned rd = INSTR (9, 5);
11354 unsigned rm = INSTR (4, 0);
11355 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11356
11357 if (rm == rn)
11358 HALT_UNALLOC;
11359
11360 offset <<= 2;
11361
11362 if (wb != Post)
11363 address += offset;
11364
11365 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11366 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11367
11368 if (wb == Post)
11369 address += offset;
11370
11371 if (wb != NoWriteBack)
11372 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11373 }
11374
11375 static void
11376 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11377 {
11378 unsigned rn = INSTR (14, 10);
11379 unsigned rd = INSTR (9, 5);
11380 unsigned rm = INSTR (4, 0);
11381 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11382
11383 if (rm == rn)
11384 HALT_UNALLOC;
11385
11386 offset <<= 3;
11387
11388 if (wb != Post)
11389 address += offset;
11390
11391 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11392 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11393
11394 if (wb == Post)
11395 address += offset;
11396
11397 if (wb != NoWriteBack)
11398 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11399 }
11400
11401 static void
11402 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11403 {
11404 FRegister a;
11405 unsigned rn = INSTR (14, 10);
11406 unsigned rd = INSTR (9, 5);
11407 unsigned rm = INSTR (4, 0);
11408 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11409
11410 if (rm == rn)
11411 HALT_UNALLOC;
11412
11413 offset <<= 4;
11414
11415 if (wb != Post)
11416 address += offset;
11417
11418 aarch64_get_mem_long_double (cpu, address, & a);
11419 aarch64_set_FP_long_double (cpu, rm, a);
11420 aarch64_get_mem_long_double (cpu, address + 16, & a);
11421 aarch64_set_FP_long_double (cpu, rn, a);
11422
11423 if (wb == Post)
11424 address += offset;
11425
11426 if (wb != NoWriteBack)
11427 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11428 }
11429
11430 static void
11431 dex_load_store_pair_fp (sim_cpu *cpu)
11432 {
11433 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11434 instr[29,25] = instruction encoding
11435 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11436 instr[22] = load/store (1=> load)
11437 instr[21,15] = signed, scaled, offset
11438 instr[14,10] = Rn
11439 instr[ 9, 5] = Rd
11440 instr[ 4, 0] = Rm */
11441
11442 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11443 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11444
11445 switch (dispatch)
11446 {
11447 case 2: store_pair_float (cpu, offset, Post); return;
11448 case 3: load_pair_float (cpu, offset, Post); return;
11449 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11450 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11451 case 6: store_pair_float (cpu, offset, Pre); return;
11452 case 7: load_pair_float (cpu, offset, Pre); return;
11453
11454 case 10: store_pair_double (cpu, offset, Post); return;
11455 case 11: load_pair_double (cpu, offset, Post); return;
11456 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11457 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11458 case 14: store_pair_double (cpu, offset, Pre); return;
11459 case 15: load_pair_double (cpu, offset, Pre); return;
11460
11461 case 18: store_pair_long_double (cpu, offset, Post); return;
11462 case 19: load_pair_long_double (cpu, offset, Post); return;
11463 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11464 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11465 case 22: store_pair_long_double (cpu, offset, Pre); return;
11466 case 23: load_pair_long_double (cpu, offset, Pre); return;
11467
11468 default:
11469 HALT_UNALLOC;
11470 }
11471 }
11472
11473 static inline unsigned
11474 vec_reg (unsigned v, unsigned o)
11475 {
11476 return (v + o) & 0x3F;
11477 }
11478
11479 /* Load multiple N-element structures to N consecutive registers. */
11480 static void
11481 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
11482 {
11483 int all = INSTR (30, 30);
11484 unsigned size = INSTR (11, 10);
11485 unsigned vd = INSTR (4, 0);
11486 unsigned i;
11487
11488 switch (size)
11489 {
11490 case 0: /* 8-bit operations. */
11491 if (all)
11492 for (i = 0; i < (16 * N); i++)
11493 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
11494 aarch64_get_mem_u8 (cpu, address + i));
11495 else
11496 for (i = 0; i < (8 * N); i++)
11497 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
11498 aarch64_get_mem_u8 (cpu, address + i));
11499 return;
11500
11501 case 1: /* 16-bit operations. */
11502 if (all)
11503 for (i = 0; i < (8 * N); i++)
11504 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
11505 aarch64_get_mem_u16 (cpu, address + i * 2));
11506 else
11507 for (i = 0; i < (4 * N); i++)
11508 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
11509 aarch64_get_mem_u16 (cpu, address + i * 2));
11510 return;
11511
11512 case 2: /* 32-bit operations. */
11513 if (all)
11514 for (i = 0; i < (4 * N); i++)
11515 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
11516 aarch64_get_mem_u32 (cpu, address + i * 4));
11517 else
11518 for (i = 0; i < (2 * N); i++)
11519 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
11520 aarch64_get_mem_u32 (cpu, address + i * 4));
11521 return;
11522
11523 case 3: /* 64-bit operations. */
11524 if (all)
11525 for (i = 0; i < (2 * N); i++)
11526 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
11527 aarch64_get_mem_u64 (cpu, address + i * 8));
11528 else
11529 for (i = 0; i < N; i++)
11530 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
11531 aarch64_get_mem_u64 (cpu, address + i * 8));
11532 return;
11533 }
11534 }
11535
11536 /* LD4: load multiple 4-element to four consecutive registers. */
11537 static void
11538 LD4 (sim_cpu *cpu, uint64_t address)
11539 {
11540 vec_load (cpu, address, 4);
11541 }
11542
11543 /* LD3: load multiple 3-element structures to three consecutive registers. */
11544 static void
11545 LD3 (sim_cpu *cpu, uint64_t address)
11546 {
11547 vec_load (cpu, address, 3);
11548 }
11549
11550 /* LD2: load multiple 2-element structures to two consecutive registers. */
11551 static void
11552 LD2 (sim_cpu *cpu, uint64_t address)
11553 {
11554 vec_load (cpu, address, 2);
11555 }
11556
11557 /* Load multiple 1-element structures into one register. */
11558 static void
11559 LD1_1 (sim_cpu *cpu, uint64_t address)
11560 {
11561 int all = INSTR (30, 30);
11562 unsigned size = INSTR (11, 10);
11563 unsigned vd = INSTR (4, 0);
11564 unsigned i;
11565
11566 switch (size)
11567 {
11568 case 0:
11569 /* LD1 {Vd.16b}, addr, #16 */
11570 /* LD1 {Vd.8b}, addr, #8 */
11571 for (i = 0; i < (all ? 16 : 8); i++)
11572 aarch64_set_vec_u8 (cpu, vd, i,
11573 aarch64_get_mem_u8 (cpu, address + i));
11574 return;
11575
11576 case 1:
11577 /* LD1 {Vd.8h}, addr, #16 */
11578 /* LD1 {Vd.4h}, addr, #8 */
11579 for (i = 0; i < (all ? 8 : 4); i++)
11580 aarch64_set_vec_u16 (cpu, vd, i,
11581 aarch64_get_mem_u16 (cpu, address + i * 2));
11582 return;
11583
11584 case 2:
11585 /* LD1 {Vd.4s}, addr, #16 */
11586 /* LD1 {Vd.2s}, addr, #8 */
11587 for (i = 0; i < (all ? 4 : 2); i++)
11588 aarch64_set_vec_u32 (cpu, vd, i,
11589 aarch64_get_mem_u32 (cpu, address + i * 4));
11590 return;
11591
11592 case 3:
11593 /* LD1 {Vd.2d}, addr, #16 */
11594 /* LD1 {Vd.1d}, addr, #8 */
11595 for (i = 0; i < (all ? 2 : 1); i++)
11596 aarch64_set_vec_u64 (cpu, vd, i,
11597 aarch64_get_mem_u64 (cpu, address + i * 8));
11598 return;
11599 }
11600 }
11601
11602 /* Load multiple 1-element structures into two registers. */
11603 static void
11604 LD1_2 (sim_cpu *cpu, uint64_t address)
11605 {
11606 /* FIXME: This algorithm is *exactly* the same as the LD2 version.
11607 So why have two different instructions ? There must be something
11608 wrong somewhere. */
11609 vec_load (cpu, address, 2);
11610 }
11611
11612 /* Load multiple 1-element structures into three registers. */
11613 static void
11614 LD1_3 (sim_cpu *cpu, uint64_t address)
11615 {
11616 /* FIXME: This algorithm is *exactly* the same as the LD3 version.
11617 So why have two different instructions ? There must be something
11618 wrong somewhere. */
11619 vec_load (cpu, address, 3);
11620 }
11621
11622 /* Load multiple 1-element structures into four registers. */
11623 static void
11624 LD1_4 (sim_cpu *cpu, uint64_t address)
11625 {
11626 /* FIXME: This algorithm is *exactly* the same as the LD4 version.
11627 So why have two different instructions ? There must be something
11628 wrong somewhere. */
11629 vec_load (cpu, address, 4);
11630 }
11631
11632 /* Store multiple N-element structures to N consecutive registers. */
11633 static void
11634 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
11635 {
11636 int all = INSTR (30, 30);
11637 unsigned size = INSTR (11, 10);
11638 unsigned vd = INSTR (4, 0);
11639 unsigned i;
11640
11641 switch (size)
11642 {
11643 case 0: /* 8-bit operations. */
11644 if (all)
11645 for (i = 0; i < (16 * N); i++)
11646 aarch64_set_mem_u8
11647 (cpu, address + i,
11648 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
11649 else
11650 for (i = 0; i < (8 * N); i++)
11651 aarch64_set_mem_u8
11652 (cpu, address + i,
11653 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
11654 return;
11655
11656 case 1: /* 16-bit operations. */
11657 if (all)
11658 for (i = 0; i < (8 * N); i++)
11659 aarch64_set_mem_u16
11660 (cpu, address + i * 2,
11661 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
11662 else
11663 for (i = 0; i < (4 * N); i++)
11664 aarch64_set_mem_u16
11665 (cpu, address + i * 2,
11666 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
11667 return;
11668
11669 case 2: /* 32-bit operations. */
11670 if (all)
11671 for (i = 0; i < (4 * N); i++)
11672 aarch64_set_mem_u32
11673 (cpu, address + i * 4,
11674 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
11675 else
11676 for (i = 0; i < (2 * N); i++)
11677 aarch64_set_mem_u32
11678 (cpu, address + i * 4,
11679 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
11680 return;
11681
11682 case 3: /* 64-bit operations. */
11683 if (all)
11684 for (i = 0; i < (2 * N); i++)
11685 aarch64_set_mem_u64
11686 (cpu, address + i * 8,
11687 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
11688 else
11689 for (i = 0; i < N; i++)
11690 aarch64_set_mem_u64
11691 (cpu, address + i * 8,
11692 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
11693 return;
11694 }
11695 }
11696
11697 /* Store multiple 4-element structure to four consecutive registers. */
11698 static void
11699 ST4 (sim_cpu *cpu, uint64_t address)
11700 {
11701 vec_store (cpu, address, 4);
11702 }
11703
11704 /* Store multiple 3-element structures to three consecutive registers. */
11705 static void
11706 ST3 (sim_cpu *cpu, uint64_t address)
11707 {
11708 vec_store (cpu, address, 3);
11709 }
11710
11711 /* Store multiple 2-element structures to two consecutive registers. */
11712 static void
11713 ST2 (sim_cpu *cpu, uint64_t address)
11714 {
11715 vec_store (cpu, address, 2);
11716 }
11717
11718 /* Store multiple 1-element structures into one register. */
11719 static void
11720 ST1_1 (sim_cpu *cpu, uint64_t address)
11721 {
11722 int all = INSTR (30, 30);
11723 unsigned size = INSTR (11, 10);
11724 unsigned vd = INSTR (4, 0);
11725 unsigned i;
11726
11727 switch (size)
11728 {
11729 case 0:
11730 for (i = 0; i < (all ? 16 : 8); i++)
11731 aarch64_set_mem_u8 (cpu, address + i,
11732 aarch64_get_vec_u8 (cpu, vd, i));
11733 return;
11734
11735 case 1:
11736 for (i = 0; i < (all ? 8 : 4); i++)
11737 aarch64_set_mem_u16 (cpu, address + i * 2,
11738 aarch64_get_vec_u16 (cpu, vd, i));
11739 return;
11740
11741 case 2:
11742 for (i = 0; i < (all ? 4 : 2); i++)
11743 aarch64_set_mem_u32 (cpu, address + i * 4,
11744 aarch64_get_vec_u32 (cpu, vd, i));
11745 return;
11746
11747 case 3:
11748 for (i = 0; i < (all ? 2 : 1); i++)
11749 aarch64_set_mem_u64 (cpu, address + i * 8,
11750 aarch64_get_vec_u64 (cpu, vd, i));
11751 return;
11752 }
11753 }
11754
11755 /* Store multiple 1-element structures into two registers. */
11756 static void
11757 ST1_2 (sim_cpu *cpu, uint64_t address)
11758 {
11759 /* FIXME: This algorithm is *exactly* the same as the ST2 version.
11760 So why have two different instructions ? There must be
11761 something wrong somewhere. */
11762 vec_store (cpu, address, 2);
11763 }
11764
11765 /* Store multiple 1-element structures into three registers. */
11766 static void
11767 ST1_3 (sim_cpu *cpu, uint64_t address)
11768 {
11769 /* FIXME: This algorithm is *exactly* the same as the ST3 version.
11770 So why have two different instructions ? There must be
11771 something wrong somewhere. */
11772 vec_store (cpu, address, 3);
11773 }
11774
11775 /* Store multiple 1-element structures into four registers. */
11776 static void
11777 ST1_4 (sim_cpu *cpu, uint64_t address)
11778 {
11779 /* FIXME: This algorithm is *exactly* the same as the ST4 version.
11780 So why have two different instructions ? There must be
11781 something wrong somewhere. */
11782 vec_store (cpu, address, 4);
11783 }
11784
11785 #define LDn_STn_SINGLE_LANE_AND_SIZE() \
11786 do \
11787 { \
11788 switch (INSTR (15, 14)) \
11789 { \
11790 case 0: \
11791 lane = (full << 3) | (s << 2) | size; \
11792 size = 0; \
11793 break; \
11794 \
11795 case 1: \
11796 if ((size & 1) == 1) \
11797 HALT_UNALLOC; \
11798 lane = (full << 2) | (s << 1) | (size >> 1); \
11799 size = 1; \
11800 break; \
11801 \
11802 case 2: \
11803 if ((size & 2) == 2) \
11804 HALT_UNALLOC; \
11805 \
11806 if ((size & 1) == 0) \
11807 { \
11808 lane = (full << 1) | s; \
11809 size = 2; \
11810 } \
11811 else \
11812 { \
11813 if (s) \
11814 HALT_UNALLOC; \
11815 lane = full; \
11816 size = 3; \
11817 } \
11818 break; \
11819 \
11820 default: \
11821 HALT_UNALLOC; \
11822 } \
11823 } \
11824 while (0)
11825
11826 /* Load single structure into one lane of N registers. */
11827 static void
11828 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11829 {
11830 /* instr[31] = 0
11831 instr[30] = element selector 0=>half, 1=>all elements
11832 instr[29,24] = 00 1101
11833 instr[23] = 0=>simple, 1=>post
11834 instr[22] = 1
11835 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11836 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11837 11111 (immediate post inc)
11838 instr[15,13] = opcode
11839 instr[12] = S, used for lane number
11840 instr[11,10] = size, also used for lane number
11841 instr[9,5] = address
11842 instr[4,0] = Vd */
11843
11844 unsigned full = INSTR (30, 30);
11845 unsigned vd = INSTR (4, 0);
11846 unsigned size = INSTR (11, 10);
11847 unsigned s = INSTR (12, 12);
11848 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11849 int lane = 0;
11850 int i;
11851
11852 NYI_assert (29, 24, 0x0D);
11853 NYI_assert (22, 22, 1);
11854
11855 /* Compute the lane number first (using size), and then compute size. */
11856 LDn_STn_SINGLE_LANE_AND_SIZE ();
11857
11858 for (i = 0; i < nregs; i++)
11859 switch (size)
11860 {
11861 case 0:
11862 {
11863 uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11864 aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11865 break;
11866 }
11867
11868 case 1:
11869 {
11870 uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11871 aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11872 break;
11873 }
11874
11875 case 2:
11876 {
11877 uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11878 aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11879 break;
11880 }
11881
11882 case 3:
11883 {
11884 uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11885 aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11886 break;
11887 }
11888 }
11889 }
11890
11891 /* Store single structure from one lane from N registers. */
11892 static void
11893 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11894 {
11895 /* instr[31] = 0
11896 instr[30] = element selector 0=>half, 1=>all elements
11897 instr[29,24] = 00 1101
11898 instr[23] = 0=>simple, 1=>post
11899 instr[22] = 0
11900 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11901 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11902 11111 (immediate post inc)
11903 instr[15,13] = opcode
11904 instr[12] = S, used for lane number
11905 instr[11,10] = size, also used for lane number
11906 instr[9,5] = address
11907 instr[4,0] = Vd */
11908
11909 unsigned full = INSTR (30, 30);
11910 unsigned vd = INSTR (4, 0);
11911 unsigned size = INSTR (11, 10);
11912 unsigned s = INSTR (12, 12);
11913 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11914 int lane = 0;
11915 int i;
11916
11917 NYI_assert (29, 24, 0x0D);
11918 NYI_assert (22, 22, 0);
11919
11920 /* Compute the lane number first (using size), and then compute size. */
11921 LDn_STn_SINGLE_LANE_AND_SIZE ();
11922
11923 for (i = 0; i < nregs; i++)
11924 switch (size)
11925 {
11926 case 0:
11927 {
11928 uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11929 aarch64_set_mem_u8 (cpu, address + i, val);
11930 break;
11931 }
11932
11933 case 1:
11934 {
11935 uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11936 aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11937 break;
11938 }
11939
11940 case 2:
11941 {
11942 uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11943 aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11944 break;
11945 }
11946
11947 case 3:
11948 {
11949 uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11950 aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11951 break;
11952 }
11953 }
11954 }
11955
11956 /* Load single structure into all lanes of N registers. */
11957 static void
11958 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11959 {
11960 /* instr[31] = 0
11961 instr[30] = element selector 0=>half, 1=>all elements
11962 instr[29,24] = 00 1101
11963 instr[23] = 0=>simple, 1=>post
11964 instr[22] = 1
11965 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11966 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11967 11111 (immediate post inc)
11968 instr[15,14] = 11
11969 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11970 instr[12] = 0
11971 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11972 10=> word(s), 11=> double(d)
11973 instr[9,5] = address
11974 instr[4,0] = Vd */
11975
11976 unsigned full = INSTR (30, 30);
11977 unsigned vd = INSTR (4, 0);
11978 unsigned size = INSTR (11, 10);
11979 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11980 int i, n;
11981
11982 NYI_assert (29, 24, 0x0D);
11983 NYI_assert (22, 22, 1);
11984 NYI_assert (15, 14, 3);
11985 NYI_assert (12, 12, 0);
11986
11987 for (n = 0; n < nregs; n++)
11988 switch (size)
11989 {
11990 case 0:
11991 {
11992 uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11993 for (i = 0; i < (full ? 16 : 8); i++)
11994 aarch64_set_vec_u8 (cpu, vd + n, i, val);
11995 break;
11996 }
11997
11998 case 1:
11999 {
12000 uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
12001 for (i = 0; i < (full ? 8 : 4); i++)
12002 aarch64_set_vec_u16 (cpu, vd + n, i, val);
12003 break;
12004 }
12005
12006 case 2:
12007 {
12008 uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
12009 for (i = 0; i < (full ? 4 : 2); i++)
12010 aarch64_set_vec_u32 (cpu, vd + n, i, val);
12011 break;
12012 }
12013
12014 case 3:
12015 {
12016 uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
12017 for (i = 0; i < (full ? 2 : 1); i++)
12018 aarch64_set_vec_u64 (cpu, vd + n, i, val);
12019 break;
12020 }
12021
12022 default:
12023 HALT_UNALLOC;
12024 }
12025 }
12026
12027 static void
12028 do_vec_load_store (sim_cpu *cpu)
12029 {
12030 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
12031
12032 instr[31] = 0
12033 instr[30] = element selector 0=>half, 1=>all elements
12034 instr[29,25] = 00110
12035 instr[24] = 0=>multiple struct, 1=>single struct
12036 instr[23] = 0=>simple, 1=>post
12037 instr[22] = 0=>store, 1=>load
12038 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
12039 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
12040 11111 (immediate post inc)
12041 instr[15,12] = elements and destinations. eg for load:
12042 0000=>LD4 => load multiple 4-element to
12043 four consecutive registers
12044 0100=>LD3 => load multiple 3-element to
12045 three consecutive registers
12046 1000=>LD2 => load multiple 2-element to
12047 two consecutive registers
12048 0010=>LD1 => load multiple 1-element to
12049 four consecutive registers
12050 0110=>LD1 => load multiple 1-element to
12051 three consecutive registers
12052 1010=>LD1 => load multiple 1-element to
12053 two consecutive registers
12054 0111=>LD1 => load multiple 1-element to
12055 one register
12056 1100=>LDR1,LDR2
12057 1110=>LDR3,LDR4
12058 instr[11,10] = element size 00=> byte(b), 01=> half(h),
12059 10=> word(s), 11=> double(d)
12060 instr[9,5] = Vn, can be SP
12061 instr[4,0] = Vd */
12062
12063 int single;
12064 int post;
12065 int load;
12066 unsigned vn;
12067 uint64_t address;
12068 int type;
12069
12070 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
12071 HALT_NYI;
12072
12073 single = INSTR (24, 24);
12074 post = INSTR (23, 23);
12075 load = INSTR (22, 22);
12076 type = INSTR (15, 12);
12077 vn = INSTR (9, 5);
12078 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
12079
12080 if (! single && INSTR (21, 21) != 0)
12081 HALT_UNALLOC;
12082
12083 if (post)
12084 {
12085 unsigned vm = INSTR (20, 16);
12086
12087 if (vm == R31)
12088 {
12089 unsigned sizeof_operation;
12090
12091 if (single)
12092 {
12093 if ((type >= 0) && (type <= 11))
12094 {
12095 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
12096 switch (INSTR (15, 14))
12097 {
12098 case 0:
12099 sizeof_operation = nregs * 1;
12100 break;
12101 case 1:
12102 sizeof_operation = nregs * 2;
12103 break;
12104 case 2:
12105 if (INSTR (10, 10) == 0)
12106 sizeof_operation = nregs * 4;
12107 else
12108 sizeof_operation = nregs * 8;
12109 break;
12110 default:
12111 HALT_UNALLOC;
12112 }
12113 }
12114 else if (type == 0xC)
12115 {
12116 sizeof_operation = INSTR (21, 21) ? 2 : 1;
12117 sizeof_operation <<= INSTR (11, 10);
12118 }
12119 else if (type == 0xE)
12120 {
12121 sizeof_operation = INSTR (21, 21) ? 4 : 3;
12122 sizeof_operation <<= INSTR (11, 10);
12123 }
12124 else
12125 HALT_UNALLOC;
12126 }
12127 else
12128 {
12129 switch (type)
12130 {
12131 case 0: sizeof_operation = 32; break;
12132 case 4: sizeof_operation = 24; break;
12133 case 8: sizeof_operation = 16; break;
12134
12135 case 7:
12136 /* One register, immediate offset variant. */
12137 sizeof_operation = 8;
12138 break;
12139
12140 case 10:
12141 /* Two registers, immediate offset variant. */
12142 sizeof_operation = 16;
12143 break;
12144
12145 case 6:
12146 /* Three registers, immediate offset variant. */
12147 sizeof_operation = 24;
12148 break;
12149
12150 case 2:
12151 /* Four registers, immediate offset variant. */
12152 sizeof_operation = 32;
12153 break;
12154
12155 default:
12156 HALT_UNALLOC;
12157 }
12158
12159 if (INSTR (30, 30))
12160 sizeof_operation *= 2;
12161 }
12162
12163 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
12164 }
12165 else
12166 aarch64_set_reg_u64 (cpu, vn, SP_OK,
12167 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
12168 }
12169 else
12170 {
12171 NYI_assert (20, 16, 0);
12172 }
12173
12174 if (single)
12175 {
12176 if (load)
12177 {
12178 if ((type >= 0) && (type <= 11))
12179 do_vec_LDn_single (cpu, address);
12180 else if ((type == 0xC) || (type == 0xE))
12181 do_vec_LDnR (cpu, address);
12182 else
12183 HALT_UNALLOC;
12184 return;
12185 }
12186
12187 /* Stores. */
12188 if ((type >= 0) && (type <= 11))
12189 {
12190 do_vec_STn_single (cpu, address);
12191 return;
12192 }
12193
12194 HALT_UNALLOC;
12195 }
12196
12197 if (load)
12198 {
12199 switch (type)
12200 {
12201 case 0: LD4 (cpu, address); return;
12202 case 4: LD3 (cpu, address); return;
12203 case 8: LD2 (cpu, address); return;
12204 case 2: LD1_4 (cpu, address); return;
12205 case 6: LD1_3 (cpu, address); return;
12206 case 10: LD1_2 (cpu, address); return;
12207 case 7: LD1_1 (cpu, address); return;
12208
12209 default:
12210 HALT_UNALLOC;
12211 }
12212 }
12213
12214 /* Stores. */
12215 switch (type)
12216 {
12217 case 0: ST4 (cpu, address); return;
12218 case 4: ST3 (cpu, address); return;
12219 case 8: ST2 (cpu, address); return;
12220 case 2: ST1_4 (cpu, address); return;
12221 case 6: ST1_3 (cpu, address); return;
12222 case 10: ST1_2 (cpu, address); return;
12223 case 7: ST1_1 (cpu, address); return;
12224 default:
12225 HALT_UNALLOC;
12226 }
12227 }
12228
12229 static void
12230 dexLdSt (sim_cpu *cpu)
12231 {
12232 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12233 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
12234 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
12235 bits [29,28:26] of a LS are the secondary dispatch vector. */
12236 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
12237
12238 switch (group2)
12239 {
12240 case LS_EXCL_000:
12241 dexLoadExclusive (cpu); return;
12242
12243 case LS_LIT_010:
12244 case LS_LIT_011:
12245 dexLoadLiteral (cpu); return;
12246
12247 case LS_OTHER_110:
12248 case LS_OTHER_111:
12249 dexLoadOther (cpu); return;
12250
12251 case LS_ADVSIMD_001:
12252 do_vec_load_store (cpu); return;
12253
12254 case LS_PAIR_100:
12255 dex_load_store_pair_gr (cpu); return;
12256
12257 case LS_PAIR_101:
12258 dex_load_store_pair_fp (cpu); return;
12259
12260 default:
12261 /* Should never reach here. */
12262 HALT_NYI;
12263 }
12264 }
12265
12266 /* Specific decode and execute for group Data Processing Register. */
12267
12268 static void
12269 dexLogicalShiftedRegister (sim_cpu *cpu)
12270 {
12271 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12272 instr[30,29] = op
12273 instr[28:24] = 01010
12274 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12275 instr[21] = N
12276 instr[20,16] = Rm
12277 instr[15,10] = count : must be 0xxxxx for 32 bit
12278 instr[9,5] = Rn
12279 instr[4,0] = Rd */
12280
12281 uint32_t size = INSTR (31, 31);
12282 Shift shiftType = INSTR (23, 22);
12283 uint32_t count = INSTR (15, 10);
12284
12285 /* 32 bit operations must have count[5] = 0.
12286 or else we have an UNALLOC. */
12287 if (size == 0 && uimm (count, 5, 5))
12288 HALT_UNALLOC;
12289
12290 /* Dispatch on size:op:N. */
12291 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12292 {
12293 case 0: and32_shift (cpu, shiftType, count); return;
12294 case 1: bic32_shift (cpu, shiftType, count); return;
12295 case 2: orr32_shift (cpu, shiftType, count); return;
12296 case 3: orn32_shift (cpu, shiftType, count); return;
12297 case 4: eor32_shift (cpu, shiftType, count); return;
12298 case 5: eon32_shift (cpu, shiftType, count); return;
12299 case 6: ands32_shift (cpu, shiftType, count); return;
12300 case 7: bics32_shift (cpu, shiftType, count); return;
12301 case 8: and64_shift (cpu, shiftType, count); return;
12302 case 9: bic64_shift (cpu, shiftType, count); return;
12303 case 10:orr64_shift (cpu, shiftType, count); return;
12304 case 11:orn64_shift (cpu, shiftType, count); return;
12305 case 12:eor64_shift (cpu, shiftType, count); return;
12306 case 13:eon64_shift (cpu, shiftType, count); return;
12307 case 14:ands64_shift (cpu, shiftType, count); return;
12308 case 15:bics64_shift (cpu, shiftType, count); return;
12309 }
12310 }
12311
12312 /* 32 bit conditional select. */
12313 static void
12314 csel32 (sim_cpu *cpu, CondCode cc)
12315 {
12316 unsigned rm = INSTR (20, 16);
12317 unsigned rn = INSTR (9, 5);
12318 unsigned rd = INSTR (4, 0);
12319
12320 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12321 testConditionCode (cpu, cc)
12322 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12323 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12324 }
12325
12326 /* 64 bit conditional select. */
12327 static void
12328 csel64 (sim_cpu *cpu, CondCode cc)
12329 {
12330 unsigned rm = INSTR (20, 16);
12331 unsigned rn = INSTR (9, 5);
12332 unsigned rd = INSTR (4, 0);
12333
12334 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12335 testConditionCode (cpu, cc)
12336 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12337 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12338 }
12339
12340 /* 32 bit conditional increment. */
12341 static void
12342 csinc32 (sim_cpu *cpu, CondCode cc)
12343 {
12344 unsigned rm = INSTR (20, 16);
12345 unsigned rn = INSTR (9, 5);
12346 unsigned rd = INSTR (4, 0);
12347
12348 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12349 testConditionCode (cpu, cc)
12350 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12351 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12352 }
12353
12354 /* 64 bit conditional increment. */
12355 static void
12356 csinc64 (sim_cpu *cpu, CondCode cc)
12357 {
12358 unsigned rm = INSTR (20, 16);
12359 unsigned rn = INSTR (9, 5);
12360 unsigned rd = INSTR (4, 0);
12361
12362 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12363 testConditionCode (cpu, cc)
12364 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12365 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12366 }
12367
12368 /* 32 bit conditional invert. */
12369 static void
12370 csinv32 (sim_cpu *cpu, CondCode cc)
12371 {
12372 unsigned rm = INSTR (20, 16);
12373 unsigned rn = INSTR (9, 5);
12374 unsigned rd = INSTR (4, 0);
12375
12376 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12377 testConditionCode (cpu, cc)
12378 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12379 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12380 }
12381
12382 /* 64 bit conditional invert. */
12383 static void
12384 csinv64 (sim_cpu *cpu, CondCode cc)
12385 {
12386 unsigned rm = INSTR (20, 16);
12387 unsigned rn = INSTR (9, 5);
12388 unsigned rd = INSTR (4, 0);
12389
12390 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12391 testConditionCode (cpu, cc)
12392 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12393 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12394 }
12395
12396 /* 32 bit conditional negate. */
12397 static void
12398 csneg32 (sim_cpu *cpu, CondCode cc)
12399 {
12400 unsigned rm = INSTR (20, 16);
12401 unsigned rn = INSTR (9, 5);
12402 unsigned rd = INSTR (4, 0);
12403
12404 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12405 testConditionCode (cpu, cc)
12406 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12407 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12408 }
12409
12410 /* 64 bit conditional negate. */
12411 static void
12412 csneg64 (sim_cpu *cpu, CondCode cc)
12413 {
12414 unsigned rm = INSTR (20, 16);
12415 unsigned rn = INSTR (9, 5);
12416 unsigned rd = INSTR (4, 0);
12417
12418 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12419 testConditionCode (cpu, cc)
12420 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12421 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12422 }
12423
12424 static void
12425 dexCondSelect (sim_cpu *cpu)
12426 {
12427 /* instr[28,21] = 11011011
12428 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12429 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12430 100 ==> CSINV, 101 ==> CSNEG,
12431 _1_ ==> UNALLOC
12432 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12433 instr[15,12] = cond
12434 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12435
12436 CondCode cc = INSTR (15, 12);
12437 uint32_t S = INSTR (29, 29);
12438 uint32_t op2 = INSTR (11, 10);
12439
12440 if (S == 1)
12441 HALT_UNALLOC;
12442
12443 if (op2 & 0x2)
12444 HALT_UNALLOC;
12445
12446 switch ((INSTR (31, 30) << 1) | op2)
12447 {
12448 case 0: csel32 (cpu, cc); return;
12449 case 1: csinc32 (cpu, cc); return;
12450 case 2: csinv32 (cpu, cc); return;
12451 case 3: csneg32 (cpu, cc); return;
12452 case 4: csel64 (cpu, cc); return;
12453 case 5: csinc64 (cpu, cc); return;
12454 case 6: csinv64 (cpu, cc); return;
12455 case 7: csneg64 (cpu, cc); return;
12456 }
12457 }
12458
12459 /* Some helpers for counting leading 1 or 0 bits. */
12460
12461 /* Counts the number of leading bits which are the same
12462 in a 32 bit value in the range 1 to 32. */
12463 static uint32_t
12464 leading32 (uint32_t value)
12465 {
12466 int32_t mask= 0xffff0000;
12467 uint32_t count= 16; /* Counts number of bits set in mask. */
12468 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12469 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12470
12471 while (lo + 1 < hi)
12472 {
12473 int32_t test = (value & mask);
12474
12475 if (test == 0 || test == mask)
12476 {
12477 lo = count;
12478 count = (lo + hi) / 2;
12479 mask >>= (count - lo);
12480 }
12481 else
12482 {
12483 hi = count;
12484 count = (lo + hi) / 2;
12485 mask <<= hi - count;
12486 }
12487 }
12488
12489 if (lo != hi)
12490 {
12491 int32_t test;
12492
12493 mask >>= 1;
12494 test = (value & mask);
12495
12496 if (test == 0 || test == mask)
12497 count = hi;
12498 else
12499 count = lo;
12500 }
12501
12502 return count;
12503 }
12504
12505 /* Counts the number of leading bits which are the same
12506 in a 64 bit value in the range 1 to 64. */
12507 static uint64_t
12508 leading64 (uint64_t value)
12509 {
12510 int64_t mask= 0xffffffff00000000LL;
12511 uint64_t count = 32; /* Counts number of bits set in mask. */
12512 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12513 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12514
12515 while (lo + 1 < hi)
12516 {
12517 int64_t test = (value & mask);
12518
12519 if (test == 0 || test == mask)
12520 {
12521 lo = count;
12522 count = (lo + hi) / 2;
12523 mask >>= (count - lo);
12524 }
12525 else
12526 {
12527 hi = count;
12528 count = (lo + hi) / 2;
12529 mask <<= hi - count;
12530 }
12531 }
12532
12533 if (lo != hi)
12534 {
12535 int64_t test;
12536
12537 mask >>= 1;
12538 test = (value & mask);
12539
12540 if (test == 0 || test == mask)
12541 count = hi;
12542 else
12543 count = lo;
12544 }
12545
12546 return count;
12547 }
12548
12549 /* Bit operations. */
12550 /* N.B register args may not be SP. */
12551
12552 /* 32 bit count leading sign bits. */
12553 static void
12554 cls32 (sim_cpu *cpu)
12555 {
12556 unsigned rn = INSTR (9, 5);
12557 unsigned rd = INSTR (4, 0);
12558
12559 /* N.B. the result needs to exclude the leading bit. */
12560 aarch64_set_reg_u64
12561 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12562 }
12563
12564 /* 64 bit count leading sign bits. */
12565 static void
12566 cls64 (sim_cpu *cpu)
12567 {
12568 unsigned rn = INSTR (9, 5);
12569 unsigned rd = INSTR (4, 0);
12570
12571 /* N.B. the result needs to exclude the leading bit. */
12572 aarch64_set_reg_u64
12573 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12574 }
12575
12576 /* 32 bit count leading zero bits. */
12577 static void
12578 clz32 (sim_cpu *cpu)
12579 {
12580 unsigned rn = INSTR (9, 5);
12581 unsigned rd = INSTR (4, 0);
12582 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12583
12584 /* if the sign (top) bit is set then the count is 0. */
12585 if (pick32 (value, 31, 31))
12586 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12587 else
12588 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12589 }
12590
12591 /* 64 bit count leading zero bits. */
12592 static void
12593 clz64 (sim_cpu *cpu)
12594 {
12595 unsigned rn = INSTR (9, 5);
12596 unsigned rd = INSTR (4, 0);
12597 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12598
12599 /* if the sign (top) bit is set then the count is 0. */
12600 if (pick64 (value, 63, 63))
12601 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12602 else
12603 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12604 }
12605
12606 /* 32 bit reverse bits. */
12607 static void
12608 rbit32 (sim_cpu *cpu)
12609 {
12610 unsigned rn = INSTR (9, 5);
12611 unsigned rd = INSTR (4, 0);
12612 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12613 uint32_t result = 0;
12614 int i;
12615
12616 for (i = 0; i < 32; i++)
12617 {
12618 result <<= 1;
12619 result |= (value & 1);
12620 value >>= 1;
12621 }
12622 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12623 }
12624
12625 /* 64 bit reverse bits. */
12626 static void
12627 rbit64 (sim_cpu *cpu)
12628 {
12629 unsigned rn = INSTR (9, 5);
12630 unsigned rd = INSTR (4, 0);
12631 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12632 uint64_t result = 0;
12633 int i;
12634
12635 for (i = 0; i < 64; i++)
12636 {
12637 result <<= 1;
12638 result |= (value & 1UL);
12639 value >>= 1;
12640 }
12641 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12642 }
12643
12644 /* 32 bit reverse bytes. */
12645 static void
12646 rev32 (sim_cpu *cpu)
12647 {
12648 unsigned rn = INSTR (9, 5);
12649 unsigned rd = INSTR (4, 0);
12650 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12651 uint32_t result = 0;
12652 int i;
12653
12654 for (i = 0; i < 4; i++)
12655 {
12656 result <<= 8;
12657 result |= (value & 0xff);
12658 value >>= 8;
12659 }
12660 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12661 }
12662
12663 /* 64 bit reverse bytes. */
12664 static void
12665 rev64 (sim_cpu *cpu)
12666 {
12667 unsigned rn = INSTR (9, 5);
12668 unsigned rd = INSTR (4, 0);
12669 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12670 uint64_t result = 0;
12671 int i;
12672
12673 for (i = 0; i < 8; i++)
12674 {
12675 result <<= 8;
12676 result |= (value & 0xffULL);
12677 value >>= 8;
12678 }
12679 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12680 }
12681
12682 /* 32 bit reverse shorts. */
12683 /* N.B.this reverses the order of the bytes in each half word. */
12684 static void
12685 revh32 (sim_cpu *cpu)
12686 {
12687 unsigned rn = INSTR (9, 5);
12688 unsigned rd = INSTR (4, 0);
12689 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12690 uint32_t result = 0;
12691 int i;
12692
12693 for (i = 0; i < 2; i++)
12694 {
12695 result <<= 8;
12696 result |= (value & 0x00ff00ff);
12697 value >>= 8;
12698 }
12699 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12700 }
12701
12702 /* 64 bit reverse shorts. */
12703 /* N.B.this reverses the order of the bytes in each half word. */
12704 static void
12705 revh64 (sim_cpu *cpu)
12706 {
12707 unsigned rn = INSTR (9, 5);
12708 unsigned rd = INSTR (4, 0);
12709 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12710 uint64_t result = 0;
12711 int i;
12712
12713 for (i = 0; i < 2; i++)
12714 {
12715 result <<= 8;
12716 result |= (value & 0x00ff00ff00ff00ffULL);
12717 value >>= 8;
12718 }
12719 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12720 }
12721
12722 static void
12723 dexDataProc1Source (sim_cpu *cpu)
12724 {
12725 /* instr[30] = 1
12726 instr[28,21] = 111010110
12727 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12728 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12729 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12730 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12731 000010 ==> REV, 000011 ==> UNALLOC
12732 000100 ==> CLZ, 000101 ==> CLS
12733 ow ==> UNALLOC
12734 instr[9,5] = rn : may not be SP
12735 instr[4,0] = rd : may not be SP. */
12736
12737 uint32_t S = INSTR (29, 29);
12738 uint32_t opcode2 = INSTR (20, 16);
12739 uint32_t opcode = INSTR (15, 10);
12740 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12741
12742 if (S == 1)
12743 HALT_UNALLOC;
12744
12745 if (opcode2 != 0)
12746 HALT_UNALLOC;
12747
12748 if (opcode & 0x38)
12749 HALT_UNALLOC;
12750
12751 switch (dispatch)
12752 {
12753 case 0: rbit32 (cpu); return;
12754 case 1: revh32 (cpu); return;
12755 case 2: rev32 (cpu); return;
12756 case 4: clz32 (cpu); return;
12757 case 5: cls32 (cpu); return;
12758 case 8: rbit64 (cpu); return;
12759 case 9: revh64 (cpu); return;
12760 case 10:rev32 (cpu); return;
12761 case 11:rev64 (cpu); return;
12762 case 12:clz64 (cpu); return;
12763 case 13:cls64 (cpu); return;
12764 default: HALT_UNALLOC;
12765 }
12766 }
12767
12768 /* Variable shift.
12769 Shifts by count supplied in register.
12770 N.B register args may not be SP.
12771 These all use the shifted auxiliary function for
12772 simplicity and clarity. Writing the actual shift
12773 inline would avoid a branch and so be faster but
12774 would also necessitate getting signs right. */
12775
12776 /* 32 bit arithmetic shift right. */
12777 static void
12778 asrv32 (sim_cpu *cpu)
12779 {
12780 unsigned rm = INSTR (20, 16);
12781 unsigned rn = INSTR (9, 5);
12782 unsigned rd = INSTR (4, 0);
12783
12784 aarch64_set_reg_u64
12785 (cpu, rd, NO_SP,
12786 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12787 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12788 }
12789
12790 /* 64 bit arithmetic shift right. */
12791 static void
12792 asrv64 (sim_cpu *cpu)
12793 {
12794 unsigned rm = INSTR (20, 16);
12795 unsigned rn = INSTR (9, 5);
12796 unsigned rd = INSTR (4, 0);
12797
12798 aarch64_set_reg_u64
12799 (cpu, rd, NO_SP,
12800 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12801 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12802 }
12803
12804 /* 32 bit logical shift left. */
12805 static void
12806 lslv32 (sim_cpu *cpu)
12807 {
12808 unsigned rm = INSTR (20, 16);
12809 unsigned rn = INSTR (9, 5);
12810 unsigned rd = INSTR (4, 0);
12811
12812 aarch64_set_reg_u64
12813 (cpu, rd, NO_SP,
12814 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12815 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12816 }
12817
12818 /* 64 bit arithmetic shift left. */
12819 static void
12820 lslv64 (sim_cpu *cpu)
12821 {
12822 unsigned rm = INSTR (20, 16);
12823 unsigned rn = INSTR (9, 5);
12824 unsigned rd = INSTR (4, 0);
12825
12826 aarch64_set_reg_u64
12827 (cpu, rd, NO_SP,
12828 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12829 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12830 }
12831
12832 /* 32 bit logical shift right. */
12833 static void
12834 lsrv32 (sim_cpu *cpu)
12835 {
12836 unsigned rm = INSTR (20, 16);
12837 unsigned rn = INSTR (9, 5);
12838 unsigned rd = INSTR (4, 0);
12839
12840 aarch64_set_reg_u64
12841 (cpu, rd, NO_SP,
12842 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12843 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12844 }
12845
12846 /* 64 bit logical shift right. */
12847 static void
12848 lsrv64 (sim_cpu *cpu)
12849 {
12850 unsigned rm = INSTR (20, 16);
12851 unsigned rn = INSTR (9, 5);
12852 unsigned rd = INSTR (4, 0);
12853
12854 aarch64_set_reg_u64
12855 (cpu, rd, NO_SP,
12856 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12857 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12858 }
12859
12860 /* 32 bit rotate right. */
12861 static void
12862 rorv32 (sim_cpu *cpu)
12863 {
12864 unsigned rm = INSTR (20, 16);
12865 unsigned rn = INSTR (9, 5);
12866 unsigned rd = INSTR (4, 0);
12867
12868 aarch64_set_reg_u64
12869 (cpu, rd, NO_SP,
12870 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12871 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12872 }
12873
12874 /* 64 bit rotate right. */
12875 static void
12876 rorv64 (sim_cpu *cpu)
12877 {
12878 unsigned rm = INSTR (20, 16);
12879 unsigned rn = INSTR (9, 5);
12880 unsigned rd = INSTR (4, 0);
12881
12882 aarch64_set_reg_u64
12883 (cpu, rd, NO_SP,
12884 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12885 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12886 }
12887
12888
12889 /* divide. */
12890
12891 /* 32 bit signed divide. */
12892 static void
12893 cpuiv32 (sim_cpu *cpu)
12894 {
12895 unsigned rm = INSTR (20, 16);
12896 unsigned rn = INSTR (9, 5);
12897 unsigned rd = INSTR (4, 0);
12898 /* N.B. the pseudo-code does the divide using 64 bit data. */
12899 /* TODO : check that this rounds towards zero as required. */
12900 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12901 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12902
12903 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12904 divisor ? ((int32_t) (dividend / divisor)) : 0);
12905 }
12906
12907 /* 64 bit signed divide. */
12908 static void
12909 cpuiv64 (sim_cpu *cpu)
12910 {
12911 unsigned rm = INSTR (20, 16);
12912 unsigned rn = INSTR (9, 5);
12913 unsigned rd = INSTR (4, 0);
12914
12915 /* TODO : check that this rounds towards zero as required. */
12916 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12917
12918 aarch64_set_reg_s64
12919 (cpu, rd, NO_SP,
12920 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12921 }
12922
12923 /* 32 bit unsigned divide. */
12924 static void
12925 udiv32 (sim_cpu *cpu)
12926 {
12927 unsigned rm = INSTR (20, 16);
12928 unsigned rn = INSTR (9, 5);
12929 unsigned rd = INSTR (4, 0);
12930
12931 /* N.B. the pseudo-code does the divide using 64 bit data. */
12932 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12933 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12934
12935 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12936 divisor ? (uint32_t) (dividend / divisor) : 0);
12937 }
12938
12939 /* 64 bit unsigned divide. */
12940 static void
12941 udiv64 (sim_cpu *cpu)
12942 {
12943 unsigned rm = INSTR (20, 16);
12944 unsigned rn = INSTR (9, 5);
12945 unsigned rd = INSTR (4, 0);
12946
12947 /* TODO : check that this rounds towards zero as required. */
12948 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12949
12950 aarch64_set_reg_u64
12951 (cpu, rd, NO_SP,
12952 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12953 }
12954
12955 static void
12956 dexDataProc2Source (sim_cpu *cpu)
12957 {
12958 /* assert instr[30] == 0
12959 instr[28,21] == 11010110
12960 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12961 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12962 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12963 001000 ==> LSLV, 001001 ==> LSRV
12964 001010 ==> ASRV, 001011 ==> RORV
12965 ow ==> UNALLOC. */
12966
12967 uint32_t dispatch;
12968 uint32_t S = INSTR (29, 29);
12969 uint32_t opcode = INSTR (15, 10);
12970
12971 if (S == 1)
12972 HALT_UNALLOC;
12973
12974 if (opcode & 0x34)
12975 HALT_UNALLOC;
12976
12977 dispatch = ( (INSTR (31, 31) << 3)
12978 | (uimm (opcode, 3, 3) << 2)
12979 | uimm (opcode, 1, 0));
12980 switch (dispatch)
12981 {
12982 case 2: udiv32 (cpu); return;
12983 case 3: cpuiv32 (cpu); return;
12984 case 4: lslv32 (cpu); return;
12985 case 5: lsrv32 (cpu); return;
12986 case 6: asrv32 (cpu); return;
12987 case 7: rorv32 (cpu); return;
12988 case 10: udiv64 (cpu); return;
12989 case 11: cpuiv64 (cpu); return;
12990 case 12: lslv64 (cpu); return;
12991 case 13: lsrv64 (cpu); return;
12992 case 14: asrv64 (cpu); return;
12993 case 15: rorv64 (cpu); return;
12994 default: HALT_UNALLOC;
12995 }
12996 }
12997
12998
12999 /* Multiply. */
13000
13001 /* 32 bit multiply and add. */
13002 static void
13003 madd32 (sim_cpu *cpu)
13004 {
13005 unsigned rm = INSTR (20, 16);
13006 unsigned ra = INSTR (14, 10);
13007 unsigned rn = INSTR (9, 5);
13008 unsigned rd = INSTR (4, 0);
13009
13010 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13011 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13012 aarch64_get_reg_u32 (cpu, ra, NO_SP)
13013 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
13014 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
13015 }
13016
13017 /* 64 bit multiply and add. */
13018 static void
13019 madd64 (sim_cpu *cpu)
13020 {
13021 unsigned rm = INSTR (20, 16);
13022 unsigned ra = INSTR (14, 10);
13023 unsigned rn = INSTR (9, 5);
13024 unsigned rd = INSTR (4, 0);
13025
13026 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13027 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13028 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13029 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
13030 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13031 }
13032
13033 /* 32 bit multiply and sub. */
13034 static void
13035 msub32 (sim_cpu *cpu)
13036 {
13037 unsigned rm = INSTR (20, 16);
13038 unsigned ra = INSTR (14, 10);
13039 unsigned rn = INSTR (9, 5);
13040 unsigned rd = INSTR (4, 0);
13041
13042 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13043 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13044 aarch64_get_reg_u32 (cpu, ra, NO_SP)
13045 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
13046 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
13047 }
13048
13049 /* 64 bit multiply and sub. */
13050 static void
13051 msub64 (sim_cpu *cpu)
13052 {
13053 unsigned rm = INSTR (20, 16);
13054 unsigned ra = INSTR (14, 10);
13055 unsigned rn = INSTR (9, 5);
13056 unsigned rd = INSTR (4, 0);
13057
13058 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13059 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13060 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13061 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
13062 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
13063 }
13064
13065 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
13066 static void
13067 smaddl (sim_cpu *cpu)
13068 {
13069 unsigned rm = INSTR (20, 16);
13070 unsigned ra = INSTR (14, 10);
13071 unsigned rn = INSTR (9, 5);
13072 unsigned rd = INSTR (4, 0);
13073
13074 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13075 obtain a 64 bit product. */
13076 aarch64_set_reg_s64
13077 (cpu, rd, NO_SP,
13078 aarch64_get_reg_s64 (cpu, ra, NO_SP)
13079 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13080 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13081 }
13082
13083 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13084 static void
13085 smsubl (sim_cpu *cpu)
13086 {
13087 unsigned rm = INSTR (20, 16);
13088 unsigned ra = INSTR (14, 10);
13089 unsigned rn = INSTR (9, 5);
13090 unsigned rd = INSTR (4, 0);
13091
13092 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13093 obtain a 64 bit product. */
13094 aarch64_set_reg_s64
13095 (cpu, rd, NO_SP,
13096 aarch64_get_reg_s64 (cpu, ra, NO_SP)
13097 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13098 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13099 }
13100
13101 /* Integer Multiply/Divide. */
13102
13103 /* First some macros and a helper function. */
13104 /* Macros to test or access elements of 64 bit words. */
13105
13106 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
13107 #define LOW_WORD_MASK ((1ULL << 32) - 1)
13108 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
13109 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
13110 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
13111 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
13112
13113 /* Offset of sign bit in 64 bit signed integger. */
13114 #define SIGN_SHIFT_U64 63
13115 /* The sign bit itself -- also identifies the minimum negative int value. */
13116 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
13117 /* Return true if a 64 bit signed int presented as an unsigned int is the
13118 most negative value. */
13119 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
13120 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
13121 int has its sign bit set to false. */
13122 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
13123 /* Return 1L or -1L according to whether a 64 bit signed int presented as
13124 an unsigned int has its sign bit set or not. */
13125 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
13126 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
13127 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
13128
13129 /* Multiply two 64 bit ints and return.
13130 the hi 64 bits of the 128 bit product. */
13131
13132 static uint64_t
13133 mul64hi (uint64_t value1, uint64_t value2)
13134 {
13135 uint64_t resultmid1;
13136 uint64_t result;
13137 uint64_t value1_lo = lowWordToU64 (value1);
13138 uint64_t value1_hi = highWordToU64 (value1) ;
13139 uint64_t value2_lo = lowWordToU64 (value2);
13140 uint64_t value2_hi = highWordToU64 (value2);
13141
13142 /* Cross-multiply and collect results. */
13143 uint64_t xproductlo = value1_lo * value2_lo;
13144 uint64_t xproductmid1 = value1_lo * value2_hi;
13145 uint64_t xproductmid2 = value1_hi * value2_lo;
13146 uint64_t xproducthi = value1_hi * value2_hi;
13147 uint64_t carry = 0;
13148 /* Start accumulating 64 bit results. */
13149 /* Drop bottom half of lowest cross-product. */
13150 uint64_t resultmid = xproductlo >> 32;
13151 /* Add in middle products. */
13152 resultmid = resultmid + xproductmid1;
13153
13154 /* Check for overflow. */
13155 if (resultmid < xproductmid1)
13156 /* Carry over 1 into top cross-product. */
13157 carry++;
13158
13159 resultmid1 = resultmid + xproductmid2;
13160
13161 /* Check for overflow. */
13162 if (resultmid1 < xproductmid2)
13163 /* Carry over 1 into top cross-product. */
13164 carry++;
13165
13166 /* Drop lowest 32 bits of middle cross-product. */
13167 result = resultmid1 >> 32;
13168 /* Move carry bit to just above middle cross-product highest bit. */
13169 carry = carry << 32;
13170
13171 /* Add top cross-product plus and any carry. */
13172 result += xproducthi + carry;
13173
13174 return result;
13175 }
13176
13177 /* Signed multiply high, source, source2 :
13178 64 bit, dest <-- high 64-bit of result. */
13179 static void
13180 smulh (sim_cpu *cpu)
13181 {
13182 uint64_t uresult;
13183 int64_t result;
13184 unsigned rm = INSTR (20, 16);
13185 unsigned rn = INSTR (9, 5);
13186 unsigned rd = INSTR (4, 0);
13187 GReg ra = INSTR (14, 10);
13188 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
13189 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
13190 uint64_t uvalue1;
13191 uint64_t uvalue2;
13192 int negate = 0;
13193
13194 if (ra != R31)
13195 HALT_UNALLOC;
13196
13197 /* Convert to unsigned and use the unsigned mul64hi routine
13198 the fix the sign up afterwards. */
13199 if (value1 < 0)
13200 {
13201 negate = !negate;
13202 uvalue1 = -value1;
13203 }
13204 else
13205 {
13206 uvalue1 = value1;
13207 }
13208
13209 if (value2 < 0)
13210 {
13211 negate = !negate;
13212 uvalue2 = -value2;
13213 }
13214 else
13215 {
13216 uvalue2 = value2;
13217 }
13218
13219 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13220
13221 uresult = mul64hi (uvalue1, uvalue2);
13222 result = uresult;
13223
13224 if (negate)
13225 {
13226 /* Multiply 128-bit result by -1, which means highpart gets inverted,
13227 and has carry in added only if low part is 0. */
13228 result = ~result;
13229 if ((uvalue1 * uvalue2) == 0)
13230 result += 1;
13231 }
13232
13233 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
13234 }
13235
13236 /* Unsigned multiply add long -- source, source2 :
13237 32 bit, source3 : 64 bit. */
13238 static void
13239 umaddl (sim_cpu *cpu)
13240 {
13241 unsigned rm = INSTR (20, 16);
13242 unsigned ra = INSTR (14, 10);
13243 unsigned rn = INSTR (9, 5);
13244 unsigned rd = INSTR (4, 0);
13245
13246 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13247 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13248 obtain a 64 bit product. */
13249 aarch64_set_reg_u64
13250 (cpu, rd, NO_SP,
13251 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13252 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13253 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13254 }
13255
13256 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13257 static void
13258 umsubl (sim_cpu *cpu)
13259 {
13260 unsigned rm = INSTR (20, 16);
13261 unsigned ra = INSTR (14, 10);
13262 unsigned rn = INSTR (9, 5);
13263 unsigned rd = INSTR (4, 0);
13264
13265 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13266 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13267 obtain a 64 bit product. */
13268 aarch64_set_reg_u64
13269 (cpu, rd, NO_SP,
13270 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13271 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13272 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13273 }
13274
13275 /* Unsigned multiply high, source, source2 :
13276 64 bit, dest <-- high 64-bit of result. */
13277 static void
13278 umulh (sim_cpu *cpu)
13279 {
13280 unsigned rm = INSTR (20, 16);
13281 unsigned rn = INSTR (9, 5);
13282 unsigned rd = INSTR (4, 0);
13283 GReg ra = INSTR (14, 10);
13284
13285 if (ra != R31)
13286 HALT_UNALLOC;
13287
13288 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13289 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13290 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13291 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13292 }
13293
13294 static void
13295 dexDataProc3Source (sim_cpu *cpu)
13296 {
13297 /* assert instr[28,24] == 11011. */
13298 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13299 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13300 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13301 instr[15] = o0 : 0/1 ==> ok
13302 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
13303 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13304 0100 ==> SMULH, (64 bit only)
13305 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13306 1100 ==> UMULH (64 bit only)
13307 ow ==> UNALLOC. */
13308
13309 uint32_t dispatch;
13310 uint32_t size = INSTR (31, 31);
13311 uint32_t op54 = INSTR (30, 29);
13312 uint32_t op31 = INSTR (23, 21);
13313 uint32_t o0 = INSTR (15, 15);
13314
13315 if (op54 != 0)
13316 HALT_UNALLOC;
13317
13318 if (size == 0)
13319 {
13320 if (op31 != 0)
13321 HALT_UNALLOC;
13322
13323 if (o0 == 0)
13324 madd32 (cpu);
13325 else
13326 msub32 (cpu);
13327 return;
13328 }
13329
13330 dispatch = (op31 << 1) | o0;
13331
13332 switch (dispatch)
13333 {
13334 case 0: madd64 (cpu); return;
13335 case 1: msub64 (cpu); return;
13336 case 2: smaddl (cpu); return;
13337 case 3: smsubl (cpu); return;
13338 case 4: smulh (cpu); return;
13339 case 10: umaddl (cpu); return;
13340 case 11: umsubl (cpu); return;
13341 case 12: umulh (cpu); return;
13342 default: HALT_UNALLOC;
13343 }
13344 }
13345
13346 static void
13347 dexDPReg (sim_cpu *cpu)
13348 {
13349 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13350 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13351 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
13352 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13353
13354 switch (group2)
13355 {
13356 case DPREG_LOG_000:
13357 case DPREG_LOG_001:
13358 dexLogicalShiftedRegister (cpu); return;
13359
13360 case DPREG_ADDSHF_010:
13361 dexAddSubtractShiftedRegister (cpu); return;
13362
13363 case DPREG_ADDEXT_011:
13364 dexAddSubtractExtendedRegister (cpu); return;
13365
13366 case DPREG_ADDCOND_100:
13367 {
13368 /* This set bundles a variety of different operations. */
13369 /* Check for. */
13370 /* 1) add/sub w carry. */
13371 uint32_t mask1 = 0x1FE00000U;
13372 uint32_t val1 = 0x1A000000U;
13373 /* 2) cond compare register/immediate. */
13374 uint32_t mask2 = 0x1FE00000U;
13375 uint32_t val2 = 0x1A400000U;
13376 /* 3) cond select. */
13377 uint32_t mask3 = 0x1FE00000U;
13378 uint32_t val3 = 0x1A800000U;
13379 /* 4) data proc 1/2 source. */
13380 uint32_t mask4 = 0x1FE00000U;
13381 uint32_t val4 = 0x1AC00000U;
13382
13383 if ((aarch64_get_instr (cpu) & mask1) == val1)
13384 dexAddSubtractWithCarry (cpu);
13385
13386 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13387 CondCompare (cpu);
13388
13389 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13390 dexCondSelect (cpu);
13391
13392 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13393 {
13394 /* Bit 30 is clear for data proc 2 source
13395 and set for data proc 1 source. */
13396 if (aarch64_get_instr (cpu) & (1U << 30))
13397 dexDataProc1Source (cpu);
13398 else
13399 dexDataProc2Source (cpu);
13400 }
13401
13402 else
13403 /* Should not reach here. */
13404 HALT_NYI;
13405
13406 return;
13407 }
13408
13409 case DPREG_3SRC_110:
13410 dexDataProc3Source (cpu); return;
13411
13412 case DPREG_UNALLOC_101:
13413 HALT_UNALLOC;
13414
13415 case DPREG_3SRC_111:
13416 dexDataProc3Source (cpu); return;
13417
13418 default:
13419 /* Should never reach here. */
13420 HALT_NYI;
13421 }
13422 }
13423
13424 /* Unconditional Branch immediate.
13425 Offset is a PC-relative byte offset in the range +/- 128MiB.
13426 The offset is assumed to be raw from the decode i.e. the
13427 simulator is expected to scale them from word offsets to byte. */
13428
13429 /* Unconditional branch. */
13430 static void
13431 buc (sim_cpu *cpu, int32_t offset)
13432 {
13433 aarch64_set_next_PC_by_offset (cpu, offset);
13434 }
13435
13436 static unsigned stack_depth = 0;
13437
13438 /* Unconditional branch and link -- writes return PC to LR. */
13439 static void
13440 bl (sim_cpu *cpu, int32_t offset)
13441 {
13442 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13443 aarch64_save_LR (cpu);
13444 aarch64_set_next_PC_by_offset (cpu, offset);
13445
13446 if (TRACE_BRANCH_P (cpu))
13447 {
13448 ++ stack_depth;
13449 TRACE_BRANCH (cpu,
13450 " %*scall %" PRIx64 " [%s]"
13451 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13452 stack_depth, " ", aarch64_get_next_PC (cpu),
13453 aarch64_get_func (CPU_STATE (cpu),
13454 aarch64_get_next_PC (cpu)),
13455 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13456 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13457 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13458 );
13459 }
13460 }
13461
13462 /* Unconditional Branch register.
13463 Branch/return address is in source register. */
13464
13465 /* Unconditional branch. */
13466 static void
13467 br (sim_cpu *cpu)
13468 {
13469 unsigned rn = INSTR (9, 5);
13470 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13471 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13472 }
13473
13474 /* Unconditional branch and link -- writes return PC to LR. */
13475 static void
13476 blr (sim_cpu *cpu)
13477 {
13478 unsigned rn = INSTR (9, 5);
13479
13480 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13481 /* The pseudo code in the spec says we update LR before fetching.
13482 the value from the rn. */
13483 aarch64_save_LR (cpu);
13484 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13485
13486 if (TRACE_BRANCH_P (cpu))
13487 {
13488 ++ stack_depth;
13489 TRACE_BRANCH (cpu,
13490 " %*scall %" PRIx64 " [%s]"
13491 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13492 stack_depth, " ", aarch64_get_next_PC (cpu),
13493 aarch64_get_func (CPU_STATE (cpu),
13494 aarch64_get_next_PC (cpu)),
13495 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13496 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13497 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13498 );
13499 }
13500 }
13501
13502 /* Return -- assembler will default source to LR this is functionally
13503 equivalent to br but, presumably, unlike br it side effects the
13504 branch predictor. */
13505 static void
13506 ret (sim_cpu *cpu)
13507 {
13508 unsigned rn = INSTR (9, 5);
13509 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13510
13511 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13512 if (TRACE_BRANCH_P (cpu))
13513 {
13514 TRACE_BRANCH (cpu,
13515 " %*sreturn [result: %" PRIx64 "]",
13516 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13517 -- stack_depth;
13518 }
13519 }
13520
13521 /* NOP -- we implement this and call it from the decode in case we
13522 want to intercept it later. */
13523
13524 static void
13525 nop (sim_cpu *cpu)
13526 {
13527 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13528 }
13529
13530 /* Data synchronization barrier. */
13531
13532 static void
13533 dsb (sim_cpu *cpu)
13534 {
13535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13536 }
13537
13538 /* Data memory barrier. */
13539
13540 static void
13541 dmb (sim_cpu *cpu)
13542 {
13543 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13544 }
13545
13546 /* Instruction synchronization barrier. */
13547
13548 static void
13549 isb (sim_cpu *cpu)
13550 {
13551 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13552 }
13553
13554 static void
13555 dexBranchImmediate (sim_cpu *cpu)
13556 {
13557 /* assert instr[30,26] == 00101
13558 instr[31] ==> 0 == B, 1 == BL
13559 instr[25,0] == imm26 branch offset counted in words. */
13560
13561 uint32_t top = INSTR (31, 31);
13562 /* We have a 26 byte signed word offset which we need to pass to the
13563 execute routine as a signed byte offset. */
13564 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13565
13566 if (top)
13567 bl (cpu, offset);
13568 else
13569 buc (cpu, offset);
13570 }
13571
13572 /* Control Flow. */
13573
13574 /* Conditional branch
13575
13576 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13577 a bit position in the range 0 .. 63
13578
13579 cc is a CondCode enum value as pulled out of the decode
13580
13581 N.B. any offset register (source) can only be Xn or Wn. */
13582
13583 static void
13584 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13585 {
13586 /* The test returns TRUE if CC is met. */
13587 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13588 if (testConditionCode (cpu, cc))
13589 aarch64_set_next_PC_by_offset (cpu, offset);
13590 }
13591
13592 /* 32 bit branch on register non-zero. */
13593 static void
13594 cbnz32 (sim_cpu *cpu, int32_t offset)
13595 {
13596 unsigned rt = INSTR (4, 0);
13597
13598 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13599 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13600 aarch64_set_next_PC_by_offset (cpu, offset);
13601 }
13602
13603 /* 64 bit branch on register zero. */
13604 static void
13605 cbnz (sim_cpu *cpu, int32_t offset)
13606 {
13607 unsigned rt = INSTR (4, 0);
13608
13609 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13610 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13611 aarch64_set_next_PC_by_offset (cpu, offset);
13612 }
13613
13614 /* 32 bit branch on register non-zero. */
13615 static void
13616 cbz32 (sim_cpu *cpu, int32_t offset)
13617 {
13618 unsigned rt = INSTR (4, 0);
13619
13620 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13621 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13622 aarch64_set_next_PC_by_offset (cpu, offset);
13623 }
13624
13625 /* 64 bit branch on register zero. */
13626 static void
13627 cbz (sim_cpu *cpu, int32_t offset)
13628 {
13629 unsigned rt = INSTR (4, 0);
13630
13631 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13632 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13633 aarch64_set_next_PC_by_offset (cpu, offset);
13634 }
13635
13636 /* Branch on register bit test non-zero -- one size fits all. */
13637 static void
13638 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13639 {
13640 unsigned rt = INSTR (4, 0);
13641
13642 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13643 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13644 aarch64_set_next_PC_by_offset (cpu, offset);
13645 }
13646
13647 /* Branch on register bit test zero -- one size fits all. */
13648 static void
13649 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13650 {
13651 unsigned rt = INSTR (4, 0);
13652
13653 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13654 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13655 aarch64_set_next_PC_by_offset (cpu, offset);
13656 }
13657
13658 static void
13659 dexCompareBranchImmediate (sim_cpu *cpu)
13660 {
13661 /* instr[30,25] = 01 1010
13662 instr[31] = size : 0 ==> 32, 1 ==> 64
13663 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13664 instr[23,5] = simm19 branch offset counted in words
13665 instr[4,0] = rt */
13666
13667 uint32_t size = INSTR (31, 31);
13668 uint32_t op = INSTR (24, 24);
13669 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13670
13671 if (size == 0)
13672 {
13673 if (op == 0)
13674 cbz32 (cpu, offset);
13675 else
13676 cbnz32 (cpu, offset);
13677 }
13678 else
13679 {
13680 if (op == 0)
13681 cbz (cpu, offset);
13682 else
13683 cbnz (cpu, offset);
13684 }
13685 }
13686
13687 static void
13688 dexTestBranchImmediate (sim_cpu *cpu)
13689 {
13690 /* instr[31] = b5 : bit 5 of test bit idx
13691 instr[30,25] = 01 1011
13692 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13693 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13694 instr[18,5] = simm14 : signed offset counted in words
13695 instr[4,0] = uimm5 */
13696
13697 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13698 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13699
13700 NYI_assert (30, 25, 0x1b);
13701
13702 if (INSTR (24, 24) == 0)
13703 tbz (cpu, pos, offset);
13704 else
13705 tbnz (cpu, pos, offset);
13706 }
13707
13708 static void
13709 dexCondBranchImmediate (sim_cpu *cpu)
13710 {
13711 /* instr[31,25] = 010 1010
13712 instr[24] = op1; op => 00 ==> B.cond
13713 instr[23,5] = simm19 : signed offset counted in words
13714 instr[4] = op0
13715 instr[3,0] = cond */
13716
13717 int32_t offset;
13718 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13719
13720 NYI_assert (31, 25, 0x2a);
13721
13722 if (op != 0)
13723 HALT_UNALLOC;
13724
13725 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13726
13727 bcc (cpu, offset, INSTR (3, 0));
13728 }
13729
13730 static void
13731 dexBranchRegister (sim_cpu *cpu)
13732 {
13733 /* instr[31,25] = 110 1011
13734 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13735 instr[20,16] = op2 : must be 11111
13736 instr[15,10] = op3 : must be 000000
13737 instr[4,0] = op2 : must be 11111. */
13738
13739 uint32_t op = INSTR (24, 21);
13740 uint32_t op2 = INSTR (20, 16);
13741 uint32_t op3 = INSTR (15, 10);
13742 uint32_t op4 = INSTR (4, 0);
13743
13744 NYI_assert (31, 25, 0x6b);
13745
13746 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13747 HALT_UNALLOC;
13748
13749 if (op == 0)
13750 br (cpu);
13751
13752 else if (op == 1)
13753 blr (cpu);
13754
13755 else if (op == 2)
13756 ret (cpu);
13757
13758 else
13759 {
13760 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13761 /* anything else is unallocated. */
13762 uint32_t rn = INSTR (4, 0);
13763
13764 if (rn != 0x1f)
13765 HALT_UNALLOC;
13766
13767 if (op == 4 || op == 5)
13768 HALT_NYI;
13769
13770 HALT_UNALLOC;
13771 }
13772 }
13773
13774 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13775 but this may not be available. So instead we define the values we need
13776 here. */
13777 #define AngelSVC_Reason_Open 0x01
13778 #define AngelSVC_Reason_Close 0x02
13779 #define AngelSVC_Reason_Write 0x05
13780 #define AngelSVC_Reason_Read 0x06
13781 #define AngelSVC_Reason_IsTTY 0x09
13782 #define AngelSVC_Reason_Seek 0x0A
13783 #define AngelSVC_Reason_FLen 0x0C
13784 #define AngelSVC_Reason_Remove 0x0E
13785 #define AngelSVC_Reason_Rename 0x0F
13786 #define AngelSVC_Reason_Clock 0x10
13787 #define AngelSVC_Reason_Time 0x11
13788 #define AngelSVC_Reason_System 0x12
13789 #define AngelSVC_Reason_Errno 0x13
13790 #define AngelSVC_Reason_GetCmdLine 0x15
13791 #define AngelSVC_Reason_HeapInfo 0x16
13792 #define AngelSVC_Reason_ReportException 0x18
13793 #define AngelSVC_Reason_Elapsed 0x30
13794
13795
13796 static void
13797 handle_halt (sim_cpu *cpu, uint32_t val)
13798 {
13799 uint64_t result = 0;
13800
13801 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13802 if (val != 0xf000)
13803 {
13804 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13805 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13806 sim_stopped, SIM_SIGTRAP);
13807 }
13808
13809 /* We have encountered an Angel SVC call. See if we can process it. */
13810 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13811 {
13812 case AngelSVC_Reason_HeapInfo:
13813 {
13814 /* Get the values. */
13815 uint64_t stack_top = aarch64_get_stack_start (cpu);
13816 uint64_t heap_base = aarch64_get_heap_start (cpu);
13817
13818 /* Get the pointer */
13819 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13820 ptr = aarch64_get_mem_u64 (cpu, ptr);
13821
13822 /* Fill in the memory block. */
13823 /* Start addr of heap. */
13824 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13825 /* End addr of heap. */
13826 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13827 /* Lowest stack addr. */
13828 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13829 /* Initial stack addr. */
13830 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13831
13832 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13833 }
13834 break;
13835
13836 case AngelSVC_Reason_Open:
13837 {
13838 /* Get the pointer */
13839 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13840 /* FIXME: For now we just assume that we will only be asked
13841 to open the standard file descriptors. */
13842 static int fd = 0;
13843 result = fd ++;
13844
13845 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13846 }
13847 break;
13848
13849 case AngelSVC_Reason_Close:
13850 {
13851 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13852 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13853 result = 0;
13854 }
13855 break;
13856
13857 case AngelSVC_Reason_Errno:
13858 result = 0;
13859 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13860 break;
13861
13862 case AngelSVC_Reason_Clock:
13863 result =
13864 #ifdef CLOCKS_PER_SEC
13865 (CLOCKS_PER_SEC >= 100)
13866 ? (clock () / (CLOCKS_PER_SEC / 100))
13867 : ((clock () * 100) / CLOCKS_PER_SEC)
13868 #else
13869 /* Presume unix... clock() returns microseconds. */
13870 (clock () / 10000)
13871 #endif
13872 ;
13873 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13874 break;
13875
13876 case AngelSVC_Reason_GetCmdLine:
13877 {
13878 /* Get the pointer */
13879 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13880 ptr = aarch64_get_mem_u64 (cpu, ptr);
13881
13882 /* FIXME: No command line for now. */
13883 aarch64_set_mem_u64 (cpu, ptr, 0);
13884 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13885 }
13886 break;
13887
13888 case AngelSVC_Reason_IsTTY:
13889 result = 1;
13890 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13891 break;
13892
13893 case AngelSVC_Reason_Write:
13894 {
13895 /* Get the pointer */
13896 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13897 /* Get the write control block. */
13898 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13899 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13900 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13901
13902 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13903 PRIx64 " on descriptor %" PRIx64,
13904 len, buf, fd);
13905
13906 if (len > 1280)
13907 {
13908 TRACE_SYSCALL (cpu,
13909 " AngelSVC: Write: Suspiciously long write: %ld",
13910 (long) len);
13911 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13912 sim_stopped, SIM_SIGBUS);
13913 }
13914 else if (fd == 1)
13915 {
13916 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13917 }
13918 else if (fd == 2)
13919 {
13920 TRACE (cpu, 0, "\n");
13921 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13922 (int) len, aarch64_get_mem_ptr (cpu, buf));
13923 TRACE (cpu, 0, "\n");
13924 }
13925 else
13926 {
13927 TRACE_SYSCALL (cpu,
13928 " AngelSVC: Write: Unexpected file handle: %d",
13929 (int) fd);
13930 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13931 sim_stopped, SIM_SIGABRT);
13932 }
13933 }
13934 break;
13935
13936 case AngelSVC_Reason_ReportException:
13937 {
13938 /* Get the pointer */
13939 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13940 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13941 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13942 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13943
13944 TRACE_SYSCALL (cpu,
13945 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13946 type, state);
13947
13948 if (type == 0x20026)
13949 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13950 sim_exited, state);
13951 else
13952 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13953 sim_stopped, SIM_SIGINT);
13954 }
13955 break;
13956
13957 case AngelSVC_Reason_Read:
13958 case AngelSVC_Reason_FLen:
13959 case AngelSVC_Reason_Seek:
13960 case AngelSVC_Reason_Remove:
13961 case AngelSVC_Reason_Time:
13962 case AngelSVC_Reason_System:
13963 case AngelSVC_Reason_Rename:
13964 case AngelSVC_Reason_Elapsed:
13965 default:
13966 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13967 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13968 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13969 sim_stopped, SIM_SIGTRAP);
13970 }
13971
13972 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13973 }
13974
13975 static void
13976 dexExcpnGen (sim_cpu *cpu)
13977 {
13978 /* instr[31:24] = 11010100
13979 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13980 010 ==> HLT, 101 ==> DBG GEN EXCPN
13981 instr[20,5] = imm16
13982 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13983 instr[1,0] = LL : discriminates opc */
13984
13985 uint32_t opc = INSTR (23, 21);
13986 uint32_t imm16 = INSTR (20, 5);
13987 uint32_t opc2 = INSTR (4, 2);
13988 uint32_t LL;
13989
13990 NYI_assert (31, 24, 0xd4);
13991
13992 if (opc2 != 0)
13993 HALT_UNALLOC;
13994
13995 LL = INSTR (1, 0);
13996
13997 /* We only implement HLT and BRK for now. */
13998 if (opc == 1 && LL == 0)
13999 {
14000 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
14001 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
14002 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
14003 }
14004
14005 if (opc == 2 && LL == 0)
14006 handle_halt (cpu, imm16);
14007
14008 else if (opc == 0 || opc == 5)
14009 HALT_NYI;
14010
14011 else
14012 HALT_UNALLOC;
14013 }
14014
14015 /* Stub for accessing system registers. */
14016
14017 static uint64_t
14018 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
14019 unsigned crm, unsigned op2)
14020 {
14021 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
14022 /* DCZID_EL0 - the Data Cache Zero ID register.
14023 We do not support DC ZVA at the moment, so
14024 we return a value with the disable bit set.
14025 We implement support for the DCZID register since
14026 it is used by the C library's memset function. */
14027 return ((uint64_t) 1) << 4;
14028
14029 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
14030 /* Cache Type Register. */
14031 return 0x80008000UL;
14032
14033 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
14034 /* TPIDR_EL0 - thread pointer id. */
14035 return aarch64_get_thread_id (cpu);
14036
14037 if (op1 == 3 && crm == 4 && op2 == 0)
14038 return aarch64_get_FPCR (cpu);
14039
14040 if (op1 == 3 && crm == 4 && op2 == 1)
14041 return aarch64_get_FPSR (cpu);
14042
14043 else if (op1 == 3 && crm == 2 && op2 == 0)
14044 return aarch64_get_CPSR (cpu);
14045
14046 HALT_NYI;
14047 }
14048
14049 static void
14050 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
14051 unsigned crm, unsigned op2, uint64_t val)
14052 {
14053 if (op1 == 3 && crm == 4 && op2 == 0)
14054 aarch64_set_FPCR (cpu, val);
14055
14056 else if (op1 == 3 && crm == 4 && op2 == 1)
14057 aarch64_set_FPSR (cpu, val);
14058
14059 else if (op1 == 3 && crm == 2 && op2 == 0)
14060 aarch64_set_CPSR (cpu, val);
14061
14062 else
14063 HALT_NYI;
14064 }
14065
14066 static void
14067 do_mrs (sim_cpu *cpu)
14068 {
14069 /* instr[31:20] = 1101 0101 0001 1
14070 instr[19] = op0
14071 instr[18,16] = op1
14072 instr[15,12] = CRn
14073 instr[11,8] = CRm
14074 instr[7,5] = op2
14075 instr[4,0] = Rt */
14076 unsigned sys_op0 = INSTR (19, 19) + 2;
14077 unsigned sys_op1 = INSTR (18, 16);
14078 unsigned sys_crn = INSTR (15, 12);
14079 unsigned sys_crm = INSTR (11, 8);
14080 unsigned sys_op2 = INSTR (7, 5);
14081 unsigned rt = INSTR (4, 0);
14082
14083 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14084 aarch64_set_reg_u64 (cpu, rt, NO_SP,
14085 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
14086 }
14087
14088 static void
14089 do_MSR_immediate (sim_cpu *cpu)
14090 {
14091 /* instr[31:19] = 1101 0101 0000 0
14092 instr[18,16] = op1
14093 instr[15,12] = 0100
14094 instr[11,8] = CRm
14095 instr[7,5] = op2
14096 instr[4,0] = 1 1111 */
14097
14098 unsigned op1 = INSTR (18, 16);
14099 /*unsigned crm = INSTR (11, 8);*/
14100 unsigned op2 = INSTR (7, 5);
14101
14102 NYI_assert (31, 19, 0x1AA0);
14103 NYI_assert (15, 12, 0x4);
14104 NYI_assert (4, 0, 0x1F);
14105
14106 if (op1 == 0)
14107 {
14108 if (op2 == 5)
14109 HALT_NYI; /* set SPSel. */
14110 else
14111 HALT_UNALLOC;
14112 }
14113 else if (op1 == 3)
14114 {
14115 if (op2 == 6)
14116 HALT_NYI; /* set DAIFset. */
14117 else if (op2 == 7)
14118 HALT_NYI; /* set DAIFclr. */
14119 else
14120 HALT_UNALLOC;
14121 }
14122 else
14123 HALT_UNALLOC;
14124 }
14125
14126 static void
14127 do_MSR_reg (sim_cpu *cpu)
14128 {
14129 /* instr[31:20] = 1101 0101 0001
14130 instr[19] = op0
14131 instr[18,16] = op1
14132 instr[15,12] = CRn
14133 instr[11,8] = CRm
14134 instr[7,5] = op2
14135 instr[4,0] = Rt */
14136
14137 unsigned sys_op0 = INSTR (19, 19) + 2;
14138 unsigned sys_op1 = INSTR (18, 16);
14139 unsigned sys_crn = INSTR (15, 12);
14140 unsigned sys_crm = INSTR (11, 8);
14141 unsigned sys_op2 = INSTR (7, 5);
14142 unsigned rt = INSTR (4, 0);
14143
14144 NYI_assert (31, 20, 0xD51);
14145
14146 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14147 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
14148 aarch64_get_reg_u64 (cpu, rt, NO_SP));
14149 }
14150
14151 static void
14152 do_SYS (sim_cpu *cpu)
14153 {
14154 /* instr[31,19] = 1101 0101 0000 1
14155 instr[18,16] = op1
14156 instr[15,12] = CRn
14157 instr[11,8] = CRm
14158 instr[7,5] = op2
14159 instr[4,0] = Rt */
14160 NYI_assert (31, 19, 0x1AA1);
14161
14162 /* FIXME: For now we just silently accept system ops. */
14163 }
14164
14165 static void
14166 dexSystem (sim_cpu *cpu)
14167 {
14168 /* instr[31:22] = 1101 01010 0
14169 instr[21] = L
14170 instr[20,19] = op0
14171 instr[18,16] = op1
14172 instr[15,12] = CRn
14173 instr[11,8] = CRm
14174 instr[7,5] = op2
14175 instr[4,0] = uimm5 */
14176
14177 /* We are interested in HINT, DSB, DMB and ISB
14178
14179 Hint #0 encodes NOOP (this is the only hint we care about)
14180 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
14181 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
14182
14183 DSB, DMB, ISB are data store barrier, data memory barrier and
14184 instruction store barrier, respectively, where
14185
14186 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
14187 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
14188 CRm<3:2> ==> domain, CRm<1:0> ==> types,
14189 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
14190 10 ==> InerShareable, 11 ==> FullSystem
14191 types : 01 ==> Reads, 10 ==> Writes,
14192 11 ==> All, 00 ==> All (domain == FullSystem). */
14193
14194 unsigned rt = INSTR (4, 0);
14195
14196 NYI_assert (31, 22, 0x354);
14197
14198 switch (INSTR (21, 12))
14199 {
14200 case 0x032:
14201 if (rt == 0x1F)
14202 {
14203 /* NOP has CRm != 0000 OR. */
14204 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
14205 uint32_t crm = INSTR (11, 8);
14206 uint32_t op2 = INSTR (7, 5);
14207
14208 if (crm != 0 || (op2 == 0 || op2 > 5))
14209 {
14210 /* Actually call nop method so we can reimplement it later. */
14211 nop (cpu);
14212 return;
14213 }
14214 }
14215 HALT_NYI;
14216
14217 case 0x033:
14218 {
14219 uint32_t op2 = INSTR (7, 5);
14220
14221 switch (op2)
14222 {
14223 case 2: HALT_NYI;
14224 case 4: dsb (cpu); return;
14225 case 5: dmb (cpu); return;
14226 case 6: isb (cpu); return;
14227 default: HALT_UNALLOC;
14228 }
14229 }
14230
14231 case 0x3B0:
14232 case 0x3B4:
14233 case 0x3BD:
14234 do_mrs (cpu);
14235 return;
14236
14237 case 0x0B7:
14238 do_SYS (cpu); /* DC is an alias of SYS. */
14239 return;
14240
14241 default:
14242 if (INSTR (21, 20) == 0x1)
14243 do_MSR_reg (cpu);
14244 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
14245 do_MSR_immediate (cpu);
14246 else
14247 HALT_NYI;
14248 return;
14249 }
14250 }
14251
14252 static void
14253 dexBr (sim_cpu *cpu)
14254 {
14255 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
14256 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
14257 bits [31,29] of a BrExSys are the secondary dispatch vector. */
14258 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14259
14260 switch (group2)
14261 {
14262 case BR_IMM_000:
14263 return dexBranchImmediate (cpu);
14264
14265 case BR_IMMCMP_001:
14266 /* Compare has bit 25 clear while test has it set. */
14267 if (!INSTR (25, 25))
14268 dexCompareBranchImmediate (cpu);
14269 else
14270 dexTestBranchImmediate (cpu);
14271 return;
14272
14273 case BR_IMMCOND_010:
14274 /* This is a conditional branch if bit 25 is clear otherwise
14275 unallocated. */
14276 if (!INSTR (25, 25))
14277 dexCondBranchImmediate (cpu);
14278 else
14279 HALT_UNALLOC;
14280 return;
14281
14282 case BR_UNALLOC_011:
14283 HALT_UNALLOC;
14284
14285 case BR_IMM_100:
14286 dexBranchImmediate (cpu);
14287 return;
14288
14289 case BR_IMMCMP_101:
14290 /* Compare has bit 25 clear while test has it set. */
14291 if (!INSTR (25, 25))
14292 dexCompareBranchImmediate (cpu);
14293 else
14294 dexTestBranchImmediate (cpu);
14295 return;
14296
14297 case BR_REG_110:
14298 /* Unconditional branch reg has bit 25 set. */
14299 if (INSTR (25, 25))
14300 dexBranchRegister (cpu);
14301
14302 /* This includes both Excpn Gen, System and unalloc operations.
14303 We need to decode the Excpn Gen operation BRK so we can plant
14304 debugger entry points.
14305 Excpn Gen operations have instr [24] = 0.
14306 we need to decode at least one of the System operations NOP
14307 which is an alias for HINT #0.
14308 System operations have instr [24,22] = 100. */
14309 else if (INSTR (24, 24) == 0)
14310 dexExcpnGen (cpu);
14311
14312 else if (INSTR (24, 22) == 4)
14313 dexSystem (cpu);
14314
14315 else
14316 HALT_UNALLOC;
14317
14318 return;
14319
14320 case BR_UNALLOC_111:
14321 HALT_UNALLOC;
14322
14323 default:
14324 /* Should never reach here. */
14325 HALT_NYI;
14326 }
14327 }
14328
14329 static void
14330 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14331 {
14332 /* We need to check if gdb wants an in here. */
14333 /* checkBreak (cpu);. */
14334
14335 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14336
14337 switch (group)
14338 {
14339 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
14340 case GROUP_LDST_0100: dexLdSt (cpu); break;
14341 case GROUP_DPREG_0101: dexDPReg (cpu); break;
14342 case GROUP_LDST_0110: dexLdSt (cpu); break;
14343 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
14344 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
14345 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
14346 case GROUP_BREXSYS_1010: dexBr (cpu); break;
14347 case GROUP_BREXSYS_1011: dexBr (cpu); break;
14348 case GROUP_LDST_1100: dexLdSt (cpu); break;
14349 case GROUP_DPREG_1101: dexDPReg (cpu); break;
14350 case GROUP_LDST_1110: dexLdSt (cpu); break;
14351 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
14352
14353 case GROUP_UNALLOC_0001:
14354 case GROUP_UNALLOC_0010:
14355 case GROUP_UNALLOC_0011:
14356 HALT_UNALLOC;
14357
14358 default:
14359 /* Should never reach here. */
14360 HALT_NYI;
14361 }
14362 }
14363
14364 static bfd_boolean
14365 aarch64_step (sim_cpu *cpu)
14366 {
14367 uint64_t pc = aarch64_get_PC (cpu);
14368
14369 if (pc == TOP_LEVEL_RETURN_PC)
14370 return FALSE;
14371
14372 aarch64_set_next_PC (cpu, pc + 4);
14373
14374 /* Code is always little-endian. */
14375 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14376 & aarch64_get_instr (cpu), pc, 4);
14377 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14378
14379 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14380 aarch64_get_instr (cpu));
14381 TRACE_DISASM (cpu, pc);
14382
14383 aarch64_decode_and_execute (cpu, pc);
14384
14385 return TRUE;
14386 }
14387
14388 void
14389 aarch64_run (SIM_DESC sd)
14390 {
14391 sim_cpu *cpu = STATE_CPU (sd, 0);
14392
14393 while (aarch64_step (cpu))
14394 {
14395 aarch64_update_PC (cpu);
14396
14397 if (sim_events_tick (sd))
14398 sim_events_process (sd);
14399 }
14400
14401 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14402 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14403 }
14404
14405 void
14406 aarch64_init (sim_cpu *cpu, uint64_t pc)
14407 {
14408 uint64_t sp = aarch64_get_stack_start (cpu);
14409
14410 /* Install SP, FP and PC and set LR to -20
14411 so we can detect a top-level return. */
14412 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14413 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14414 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14415 aarch64_set_next_PC (cpu, pc);
14416 aarch64_update_PC (cpu);
14417 aarch64_init_LIT_table ();
14418 }
This page took 0.667233 seconds and 5 git commands to generate.