2002-06-12 Chris Demetriou <cgd@broadcom.com>
[deliverable/binutils-gdb.git] / sim / mips / mdmx.c
1 /* Simulation code for the MIPS MDMX ASE.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 Contributed by Broadcom Corporation (SiByte).
4
5 This file is part of GDB, the GNU debugger.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License along
18 with this program; if not, write to the Free Software Foundation, Inc.,
19 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 #include <stdio.h>
22
23 #include "sim-main.h"
24
25 /* Within mdmx.c we refer to the sim_cpu directly. */
26 #define CPU cpu
27 #define SD (CPU_STATE(CPU))
28 #define SD_ cpu, cia, -1
29
30 /* MDMX Representations
31
32 An 8-bit packed byte element (OB) is always unsigned.
33 The 24-bit accumulators are signed and are represented as 32-bit
34 signed values, which are reduced to 24-bit signed values prior to
35 Round and Clamp operations.
36
37 A 16-bit packed halfword element (QH) is always signed.
38 The 48-bit accumulators are signed and are represented as 64-bit
39 signed values, which are reduced to 48-bit signed values prior to
40 Round and Clamp operations.
41
42 The code below assumes a 2's-complement representation of signed
43 quantities. Care is required to clear extended sign bits when
44 repacking fields.
45
46 The code (and the code for arithmetic shifts in mips.igen) also makes
47 the (not guaranteed portable) assumption that right shifts of signed
48 quantities in C do sign extension. */
49
50 typedef unsigned64 unsigned48;
51 #define MASK48 (UNSIGNED64 (0xffffffffffff))
52
53 typedef unsigned32 unsigned24;
54 #define MASK24 (UNSIGNED32 (0xffffff))
55
56 typedef enum {
57 mdmx_ob, /* OB (octal byte) */
58 mdmx_qh /* QH (quad half-word) */
59 } MX_fmt;
60
61 typedef enum {
62 sel_elem, /* element select */
63 sel_vect, /* vector select */
64 sel_imm /* immediate select */
65 } VT_select;
66
67 #define OB_MAX ((unsigned8)0xFF)
68 #define QH_MIN ((signed16)0x8000)
69 #define QH_MAX ((signed16)0x7FFF)
70
71 #define OB_CLAMP(x) ((unsigned8)((x) > OB_MAX ? OB_MAX : (x)))
72 #define QH_CLAMP(x) ((signed16)((x) < QH_MIN ? QH_MIN : \
73 ((x) > QH_MAX ? QH_MAX : (x))))
74
75 #define MX_FMT(fmtsel) (((fmtsel) & 0x1) == 0 ? mdmx_ob : mdmx_qh)
76 #define MX_VT(fmtsel) (((fmtsel) & 0x10) == 0 ? sel_elem : \
77 (((fmtsel) & 0x18) == 0x10 ? sel_vect : sel_imm))
78
79 #define QH_ELEM(v,fmtsel) \
80 ((signed16)(((v) >> (((fmtsel) & 0xC) << 2)) & 0xFFFF))
81 #define OB_ELEM(v,fmtsel) \
82 ((unsigned8)(((v) >> (((fmtsel) & 0xE) << 2)) & 0xFF))
83
84
85 typedef signed16 (*QH_FUNC)(signed16, signed16);
86 typedef unsigned8 (*OB_FUNC)(unsigned8, unsigned8);
87
88 /* vectorized logical operators */
89
90 static signed16
91 AndQH(signed16 ts, signed16 tt)
92 {
93 return (signed16)((unsigned16)ts & (unsigned16)tt);
94 }
95
96 static unsigned8
97 AndOB(unsigned8 ts, unsigned8 tt)
98 {
99 return ts & tt;
100 }
101
102 static signed16
103 NorQH(signed16 ts, signed16 tt)
104 {
105 return (signed16)(((unsigned16)ts | (unsigned16)tt) ^ 0xFFFF);
106 }
107
108 static unsigned8
109 NorOB(unsigned8 ts, unsigned8 tt)
110 {
111 return (ts | tt) ^ 0xFF;
112 }
113
114 static signed16
115 OrQH(signed16 ts, signed16 tt)
116 {
117 return (signed16)((unsigned16)ts | (unsigned16)tt);
118 }
119
120 static unsigned8
121 OrOB(unsigned8 ts, unsigned8 tt)
122 {
123 return ts | tt;
124 }
125
126 static signed16
127 XorQH(signed16 ts, signed16 tt)
128 {
129 return (signed16)((unsigned16)ts ^ (unsigned16)tt);
130 }
131
132 static unsigned8
133 XorOB(unsigned8 ts, unsigned8 tt)
134 {
135 return ts ^ tt;
136 }
137
138 static signed16
139 SLLQH(signed16 ts, signed16 tt)
140 {
141 unsigned32 s = (unsigned32)tt & 0xF;
142 return (signed16)(((unsigned32)ts << s) & 0xFFFF);
143 }
144
145 static unsigned8
146 SLLOB(unsigned8 ts, unsigned8 tt)
147 {
148 unsigned32 s = tt & 0x7;
149 return (ts << s) & 0xFF;
150 }
151
152 static signed16
153 SRLQH(signed16 ts, signed16 tt)
154 {
155 unsigned32 s = (unsigned32)tt & 0xF;
156 return (signed16)((unsigned16)ts >> s);
157 }
158
159 static unsigned8
160 SRLOB(unsigned8 ts, unsigned8 tt)
161 {
162 unsigned32 s = tt & 0x7;
163 return ts >> s;
164 }
165
166
167 /* Vectorized arithmetic operators. */
168
169 static signed16
170 AddQH(signed16 ts, signed16 tt)
171 {
172 signed32 t = (signed32)ts + (signed32)tt;
173 return QH_CLAMP(t);
174 }
175
176 static unsigned8
177 AddOB(unsigned8 ts, unsigned8 tt)
178 {
179 unsigned32 t = (unsigned32)ts + (unsigned32)tt;
180 return OB_CLAMP(t);
181 }
182
183 static signed16
184 SubQH(signed16 ts, signed16 tt)
185 {
186 signed32 t = (signed32)ts - (signed32)tt;
187 return QH_CLAMP(t);
188 }
189
190 static unsigned8
191 SubOB(unsigned8 ts, unsigned8 tt)
192 {
193 signed32 t;
194 t = (signed32)ts - (signed32)tt;
195 if (t < 0)
196 t = 0;
197 return (unsigned8)t;
198 }
199
200 static signed16
201 MinQH(signed16 ts, signed16 tt)
202 {
203 return (ts < tt ? ts : tt);
204 }
205
206 static unsigned8
207 MinOB(unsigned8 ts, unsigned8 tt)
208 {
209 return (ts < tt ? ts : tt);
210 }
211
212 static signed16
213 MaxQH(signed16 ts, signed16 tt)
214 {
215 return (ts > tt ? ts : tt);
216 }
217
218 static unsigned8
219 MaxOB(unsigned8 ts, unsigned8 tt)
220 {
221 return (ts > tt ? ts : tt);
222 }
223
224 static signed16
225 MulQH(signed16 ts, signed16 tt)
226 {
227 signed32 t = (signed32)ts * (signed32)tt;
228 return QH_CLAMP(t);
229 }
230
231 static unsigned8
232 MulOB(unsigned8 ts, unsigned8 tt)
233 {
234 unsigned32 t = (unsigned32)ts * (unsigned32)tt;
235 return OB_CLAMP(t);
236 }
237
238 /* "msgn" and "sra" are defined only for QH format. */
239
240 static signed16
241 MsgnQH(signed16 ts, signed16 tt)
242 {
243 signed16 t;
244 if (ts < 0)
245 t = (tt == QH_MIN ? QH_MAX : -tt);
246 else if (ts == 0)
247 t = 0;
248 else
249 t = tt;
250 return t;
251 }
252
253 static signed16
254 SRAQH(signed16 ts, signed16 tt)
255 {
256 unsigned32 s = (unsigned32)tt & 0xF;
257 return (signed16)((signed32)ts >> s);
258 }
259
260
261 /* "pabsdiff" and "pavg" are defined only for OB format. */
262
263 static unsigned8
264 AbsDiffOB(unsigned8 ts, unsigned8 tt)
265 {
266 return (ts >= tt ? ts - tt : tt - ts);
267 }
268
269 static unsigned8
270 AvgOB(unsigned8 ts, unsigned8 tt)
271 {
272 return ((unsigned32)ts + (unsigned32)tt + 1) >> 1;
273 }
274
275
276 /* Dispatch tables for operations that update a CPR. */
277
278 static const QH_FUNC qh_func[] = {
279 AndQH, NorQH, OrQH, XorQH, SLLQH, SRLQH,
280 AddQH, SubQH, MinQH, MaxQH,
281 MulQH, MsgnQH, SRAQH, NULL, NULL
282 };
283
284 static const OB_FUNC ob_func[] = {
285 AndOB, NorOB, OrOB, XorOB, SLLOB, SRLOB,
286 AddOB, SubOB, MinOB, MaxOB,
287 MulOB, NULL, NULL, AbsDiffOB, AvgOB
288 };
289
290 /* Auxiliary functions for CPR updates. */
291
292 /* Vector mapping for QH format. */
293 static unsigned64
294 qh_vector_op(unsigned64 v1, unsigned64 v2, QH_FUNC func)
295 {
296 unsigned64 result = 0;
297 int i;
298 signed16 h, h1, h2;
299
300 for (i = 0; i < 64; i += 16)
301 {
302 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
303 h2 = (signed16)(v2 & 0xFFFF); v2 >>= 16;
304 h = (*func)(h1, h2);
305 result |= ((unsigned64)((unsigned16)h) << i);
306 }
307 return result;
308 }
309
310 static unsigned64
311 qh_map_op(unsigned64 v1, signed16 h2, QH_FUNC func)
312 {
313 unsigned64 result = 0;
314 int i;
315 signed16 h, h1;
316
317 for (i = 0; i < 64; i += 16)
318 {
319 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
320 h = (*func)(h1, h2);
321 result |= ((unsigned64)((unsigned16)h) << i);
322 }
323 return result;
324 }
325
326
327 /* Vector operations for OB format. */
328
329 static unsigned64
330 ob_vector_op(unsigned64 v1, unsigned64 v2, OB_FUNC func)
331 {
332 unsigned64 result = 0;
333 int i;
334 unsigned8 b, b1, b2;
335
336 for (i = 0; i < 64; i += 8)
337 {
338 b1 = v1 & 0xFF; v1 >>= 8;
339 b2 = v2 & 0xFF; v2 >>= 8;
340 b = (*func)(b1, b2);
341 result |= ((unsigned64)b << i);
342 }
343 return result;
344 }
345
346 static unsigned64
347 ob_map_op(unsigned64 v1, unsigned8 b2, OB_FUNC func)
348 {
349 unsigned64 result = 0;
350 int i;
351 unsigned8 b, b1;
352
353 for (i = 0; i < 64; i += 8)
354 {
355 b1 = v1 & 0xFF; v1 >>= 8;
356 b = (*func)(b1, b2);
357 result |= ((unsigned64)b << i);
358 }
359 return result;
360 }
361
362
363 /* Primary entry for operations that update CPRs. */
364 unsigned64
365 mdmx_cpr_op(sim_cpu *cpu,
366 address_word cia,
367 int op,
368 unsigned64 op1,
369 int vt,
370 MX_fmtsel fmtsel)
371 {
372 unsigned64 op2;
373 unsigned64 result = 0;
374
375 switch (MX_FMT (fmtsel))
376 {
377 case mdmx_qh:
378 switch (MX_VT (fmtsel))
379 {
380 case sel_elem:
381 op2 = ValueFPR(vt, fmt_mdmx);
382 result = qh_map_op(op1, QH_ELEM(op2, fmtsel), qh_func[op]);
383 break;
384 case sel_vect:
385 result = qh_vector_op(op1, ValueFPR(vt, fmt_mdmx), qh_func[op]);
386 break;
387 case sel_imm:
388 result = qh_map_op(op1, vt, qh_func[op]);
389 break;
390 }
391 break;
392 case mdmx_ob:
393 switch (MX_VT (fmtsel))
394 {
395 case sel_elem:
396 op2 = ValueFPR(vt, fmt_mdmx);
397 result = ob_map_op(op1, OB_ELEM(op2, fmtsel), ob_func[op]);
398 break;
399 case sel_vect:
400 result = ob_vector_op(op1, ValueFPR(vt, fmt_mdmx), ob_func[op]);
401 break;
402 case sel_imm:
403 result = ob_map_op(op1, vt, ob_func[op]);
404 break;
405 }
406 break;
407 default:
408 Unpredictable ();
409 }
410
411 return result;
412 }
413
414
415 /* Operations that update CCs */
416
417 static void
418 qh_vector_test(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int cond)
419 {
420 int i;
421 signed16 h1, h2;
422 int boolean;
423
424 for (i = 0; i < 4; i++)
425 {
426 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
427 h2 = (signed16)(v2 & 0xFFFF); v2 >>= 16;
428 boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
429 ((cond & MX_C_LT) && (h1 < h2));
430 SETFCC(i, boolean);
431 }
432 }
433
434 static void
435 qh_map_test(sim_cpu *cpu, unsigned64 v1, signed16 h2, int cond)
436 {
437 int i;
438 signed16 h1;
439 int boolean;
440
441 for (i = 0; i < 4; i++)
442 {
443 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
444 boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
445 ((cond & MX_C_LT) && (h1 < h2));
446 SETFCC(i, boolean);
447 }
448 }
449
450 static void
451 ob_vector_test(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int cond)
452 {
453 int i;
454 unsigned8 b1, b2;
455 int boolean;
456
457 for (i = 0; i < 8; i++)
458 {
459 b1 = v1 & 0xFF; v1 >>= 8;
460 b2 = v2 & 0xFF; v2 >>= 8;
461 boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
462 ((cond & MX_C_LT) && (b1 < b2));
463 SETFCC(i, boolean);
464 }
465 }
466
467 static void
468 ob_map_test(sim_cpu *cpu, unsigned64 v1, unsigned8 b2, int cond)
469 {
470 int i;
471 unsigned8 b1;
472 int boolean;
473
474 for (i = 0; i < 8; i++)
475 {
476 b1 = (unsigned8)(v1 & 0xFF); v1 >>= 8;
477 boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
478 ((cond & MX_C_LT) && (b1 < b2));
479 SETFCC(i, boolean);
480 }
481 }
482
483
484 void
485 mdmx_cc_op(sim_cpu *cpu,
486 address_word cia,
487 int cond,
488 unsigned64 v1,
489 int vt,
490 MX_fmtsel fmtsel)
491 {
492 unsigned64 op2;
493
494 switch (MX_FMT (fmtsel))
495 {
496 case mdmx_qh:
497 switch (MX_VT (fmtsel))
498 {
499 case sel_elem:
500 op2 = ValueFPR(vt, fmt_mdmx);
501 qh_map_test(cpu, v1, QH_ELEM(op2, fmtsel), cond);
502 break;
503 case sel_vect:
504 qh_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
505 break;
506 case sel_imm:
507 qh_map_test(cpu, v1, vt, cond);
508 break;
509 }
510 break;
511 case mdmx_ob:
512 switch (MX_VT (fmtsel))
513 {
514 case sel_elem:
515 op2 = ValueFPR(vt, fmt_mdmx);
516 ob_map_test(cpu, v1, OB_ELEM(op2, fmtsel), cond);
517 break;
518 case sel_vect:
519 ob_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
520 break;
521 case sel_imm:
522 ob_map_test(cpu, v1, vt, cond);
523 break;
524 }
525 break;
526 default:
527 Unpredictable ();
528 }
529 }
530
531
532 /* Pick operations. */
533
534 static unsigned64
535 qh_vector_pick(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int tf)
536 {
537 unsigned64 result = 0;
538 int i, s;
539 unsigned16 h;
540
541 s = 0;
542 for (i = 0; i < 4; i++)
543 {
544 h = ((GETFCC(i) == tf) ? (v1 & 0xFFFF) : (v2 & 0xFFFF));
545 v1 >>= 16; v2 >>= 16;
546 result |= ((unsigned64)h << s);
547 s += 16;
548 }
549 return result;
550 }
551
552 static unsigned64
553 qh_map_pick(sim_cpu *cpu, unsigned64 v1, signed16 h2, int tf)
554 {
555 unsigned64 result = 0;
556 int i, s;
557 unsigned16 h;
558
559 s = 0;
560 for (i = 0; i < 4; i++)
561 {
562 h = (GETFCC(i) == tf) ? (v1 & 0xFFFF) : (unsigned16)h2;
563 v1 >>= 16;
564 result |= ((unsigned64)h << s);
565 s += 16;
566 }
567 return result;
568 }
569
570 static unsigned64
571 ob_vector_pick(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int tf)
572 {
573 unsigned64 result = 0;
574 int i, s;
575 unsigned8 b;
576
577 s = 0;
578 for (i = 0; i < 8; i++)
579 {
580 b = (GETFCC(i) == tf) ? (v1 & 0xFF) : (v2 & 0xFF);
581 v1 >>= 8; v2 >>= 8;
582 result |= ((unsigned64)b << s);
583 s += 8;
584 }
585 return result;
586 }
587
588 static unsigned64
589 ob_map_pick(sim_cpu *cpu, unsigned64 v1, unsigned8 b2, int tf)
590 {
591 unsigned64 result = 0;
592 int i, s;
593 unsigned8 b;
594
595 s = 0;
596 for (i = 0; i < 8; i++)
597 {
598 b = (GETFCC(i) == tf) ? (v1 & 0xFF) : b2;
599 v1 >>= 8;
600 result |= ((unsigned64)b << s);
601 s += 8;
602 }
603 return result;
604 }
605
606
607 unsigned64
608 mdmx_pick_op(sim_cpu *cpu,
609 address_word cia,
610 int tf,
611 unsigned64 v1,
612 int vt,
613 MX_fmtsel fmtsel)
614 {
615 unsigned64 result = 0;
616 unsigned64 op2;
617
618 switch (MX_FMT (fmtsel))
619 {
620 case mdmx_qh:
621 switch (MX_VT (fmtsel))
622 {
623 case sel_elem:
624 op2 = ValueFPR(vt, fmt_mdmx);
625 result = qh_map_pick(cpu, v1, QH_ELEM(op2, fmtsel), tf);
626 break;
627 case sel_vect:
628 result = qh_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
629 break;
630 case sel_imm:
631 result = qh_map_pick(cpu, v1, vt, tf);
632 break;
633 }
634 break;
635 case mdmx_ob:
636 switch (MX_VT (fmtsel))
637 {
638 case sel_elem:
639 op2 = ValueFPR(vt, fmt_mdmx);
640 result = ob_map_pick(cpu, v1, OB_ELEM(op2, fmtsel), tf);
641 break;
642 case sel_vect:
643 result = ob_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
644 break;
645 case sel_imm:
646 result = ob_map_pick(cpu, v1, vt, tf);
647 break;
648 }
649 break;
650 default:
651 Unpredictable ();
652 }
653 return result;
654 }
655
656
657 /* Accumulators. */
658
659 typedef void (*QH_ACC)(signed48 *a, signed16 ts, signed16 tt);
660
661 static void
662 AccAddAQH(signed48 *a, signed16 ts, signed16 tt)
663 {
664 *a += (signed48)ts + (signed48)tt;
665 }
666
667 static void
668 AccAddLQH(signed48 *a, signed16 ts, signed16 tt)
669 {
670 *a = (signed48)ts + (signed48)tt;
671 }
672
673 static void
674 AccMulAQH(signed48 *a, signed16 ts, signed16 tt)
675 {
676 *a += (signed48)ts * (signed48)tt;
677 }
678
679 static void
680 AccMulLQH(signed48 *a, signed16 ts, signed16 tt)
681 {
682 *a = (signed48)ts * (signed48)tt;
683 }
684
685 static void
686 SubMulAQH(signed48 *a, signed16 ts, signed16 tt)
687 {
688 *a -= (signed48)ts * (signed48)tt;
689 }
690
691 static void
692 SubMulLQH(signed48 *a, signed16 ts, signed16 tt)
693 {
694 *a = -((signed48)ts * (signed48)tt);
695 }
696
697 static void
698 AccSubAQH(signed48 *a, signed16 ts, signed16 tt)
699 {
700 *a += (signed48)ts - (signed48)tt;
701 }
702
703 static void
704 AccSubLQH(signed48 *a, signed16 ts, signed16 tt)
705 {
706 *a = (signed48)ts - (signed48)tt;
707 }
708
709
710 typedef void (*OB_ACC)(signed24 *acc, unsigned8 ts, unsigned8 tt);
711
712 static void
713 AccAddAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
714 {
715 *a += (signed24)ts + (signed24)tt;
716 }
717
718 static void
719 AccAddLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
720 {
721 *a = (signed24)ts + (signed24)tt;
722 }
723
724 static void
725 AccMulAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
726 {
727 *a += (signed24)ts * (signed24)tt;
728 }
729
730 static void
731 AccMulLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
732 {
733 *a = (signed24)ts * (signed24)tt;
734 }
735
736 static void
737 SubMulAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
738 {
739 *a -= (signed24)ts * (signed24)tt;
740 }
741
742 static void
743 SubMulLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
744 {
745 *a = -((signed24)ts * (signed24)tt);
746 }
747
748 static void
749 AccSubAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
750 {
751 *a += (signed24)ts - (signed24)tt;
752 }
753
754 static void
755 AccSubLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
756 {
757 *a = (signed24)ts - (signed24)tt;
758 }
759
760 static void
761 AccAbsDiffOB(signed24 *a, unsigned8 ts, unsigned8 tt)
762 {
763 unsigned8 t = (ts >= tt ? ts - tt : tt - ts);
764 *a += (signed24)t;
765 }
766
767
768 /* Dispatch tables for operations that update a CPR. */
769
770 static const QH_ACC qh_acc[] = {
771 AccAddAQH, AccAddAQH, AccMulAQH, AccMulLQH,
772 SubMulAQH, SubMulLQH, AccSubAQH, AccSubLQH,
773 NULL
774 };
775
776 static const OB_ACC ob_acc[] = {
777 AccAddAOB, AccAddLOB, AccMulAOB, AccMulLOB,
778 SubMulAOB, SubMulLOB, AccSubAOB, AccSubLOB,
779 AccAbsDiffOB
780 };
781
782
783 static void
784 qh_vector_acc(signed48 a[], unsigned64 v1, unsigned64 v2, QH_ACC acc)
785 {
786 int i;
787 signed16 h1, h2;
788
789 for (i = 0; i < 4; i++)
790 {
791 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
792 h2 = (signed16)(v2 & 0xFFFF); v2 >>= 16;
793 (*acc)(&a[i], h1, h2);
794 }
795 }
796
797 static void
798 qh_map_acc(signed48 a[], unsigned64 v1, signed16 h2, QH_ACC acc)
799 {
800 int i;
801 signed16 h1;
802
803 for (i = 0; i < 4; i++)
804 {
805 h1 = (signed16)(v1 & 0xFFFF); v1 >>= 16;
806 (*acc)(&a[i], h1, h2);
807 }
808 }
809
810 static void
811 ob_vector_acc(signed24 a[], unsigned64 v1, unsigned64 v2, OB_ACC acc)
812 {
813 int i;
814 unsigned8 b1, b2;
815
816 for (i = 0; i < 8; i++)
817 {
818 b1 = v1 & 0xFF; v1 >>= 8;
819 b2 = v2 & 0xFF; v2 >>= 8;
820 (*acc)(&a[i], b1, b2);
821 }
822 }
823
824 static void
825 ob_map_acc(signed24 a[], unsigned64 v1, unsigned8 b2, OB_ACC acc)
826 {
827 int i;
828 unsigned8 b1;
829
830 for (i = 0; i < 8; i++)
831 {
832 b1 = v1 & 0xFF; v1 >>= 8;
833 (*acc)(&a[i], b1, b2);
834 }
835 }
836
837
838 /* Primary entry for operations that accumulate */
839 void
840 mdmx_acc_op(sim_cpu *cpu,
841 address_word cia,
842 int op,
843 unsigned64 op1,
844 int vt,
845 MX_fmtsel fmtsel)
846 {
847 unsigned64 op2;
848
849 switch (MX_FMT (fmtsel))
850 {
851 case mdmx_qh:
852 switch (MX_VT (fmtsel))
853 {
854 case sel_elem:
855 op2 = ValueFPR(vt, fmt_mdmx);
856 qh_map_acc(ACC.qh, op1, QH_ELEM(op2, fmtsel), qh_acc[op]);
857 break;
858 case sel_vect:
859 qh_vector_acc(ACC.qh, op1, ValueFPR(vt, fmt_mdmx), qh_acc[op]);
860 break;
861 case sel_imm:
862 qh_map_acc(ACC.qh, op1, vt, qh_acc[op]);
863 break;
864 }
865 break;
866 case mdmx_ob:
867 switch (MX_VT (fmtsel))
868 {
869 case sel_elem:
870 op2 = ValueFPR(vt, fmt_mdmx);
871 ob_map_acc(ACC.ob, op1, OB_ELEM(op2, fmtsel), ob_acc[op]);
872 break;
873 case sel_vect:
874 ob_vector_acc(ACC.ob, op1, ValueFPR(vt, fmt_mdmx), ob_acc[op]);
875 break;
876 case sel_imm:
877 ob_map_acc(ACC.ob, op1, op2, ob_acc[op]);
878 break;
879 }
880 break;
881 default:
882 Unpredictable ();
883 }
884 }
885
886
887 /* Reading and writing accumulator (no conversion). */
888
889 unsigned64
890 mdmx_rac_op(sim_cpu *cpu,
891 address_word cia,
892 int op,
893 int fmt)
894 {
895 unsigned64 result;
896 unsigned int shift;
897 int i;
898
899 shift = op; /* L = 00, M = 01, H = 10. */
900 result = 0;
901
902 switch (fmt)
903 {
904 case MX_FMT_QH:
905 shift <<= 4; /* 16 bits per element. */
906 for (i = 3; i >= 0; --i)
907 {
908 result <<= 16;
909 result |= ((ACC.qh[i] >> shift) & 0xFFFF);
910 }
911 break;
912 case MX_FMT_OB:
913 shift <<= 3; /* 8 bits per element. */
914 for (i = 7; i >= 0; --i)
915 {
916 result <<= 8;
917 result |= ((ACC.ob[i] >> shift) & 0xFF);
918 }
919 break;
920 default:
921 Unpredictable ();
922 }
923 return result;
924 }
925
926 void
927 mdmx_wacl(sim_cpu *cpu,
928 address_word cia,
929 int fmt,
930 unsigned64 vs,
931 unsigned64 vt)
932 {
933 int i;
934
935 switch (fmt)
936 {
937 case MX_FMT_QH:
938 for (i = 0; i < 4; i++)
939 {
940 signed32 s = (signed16)(vs & 0xFFFF);
941 ACC.qh[i] = ((signed48)s << 16) | (vt & 0xFFFF);
942 vs >>= 16; vt >>= 16;
943 }
944 break;
945 case MX_FMT_OB:
946 for (i = 0; i < 8; i++)
947 {
948 signed16 s = (signed8)(vs & 0xFF);
949 ACC.ob[i] = ((signed24)s << 8) | (vt & 0xFF);
950 vs >>= 8; vt >>= 8;
951 }
952 break;
953 default:
954 Unpredictable ();
955 }
956 }
957
958 void
959 mdmx_wach(sim_cpu *cpu,
960 address_word cia,
961 int fmt,
962 unsigned64 vs)
963 {
964 int i;
965
966 switch (fmt)
967 {
968 case MX_FMT_QH:
969 for (i = 0; i < 4; i++)
970 {
971 signed32 s = (signed16)(vs & 0xFFFF);
972 ACC.qh[i] &= ~((signed48)0xFFFF << 32);
973 ACC.qh[i] |= ((signed48)s << 32);
974 vs >>= 16;
975 }
976 break;
977 case MX_FMT_OB:
978 for (i = 0; i < 8; i++)
979 {
980 ACC.ob[i] &= ~((signed24)0xFF << 16);
981 ACC.ob[i] |= ((signed24)(vs & 0xFF) << 16);
982 vs >>= 8;
983 }
984 break;
985 default:
986 Unpredictable ();
987 }
988 }
989
990
991 /* Reading and writing accumulator (rounding conversions).
992 Enumerating function guarantees s >= 0 for QH ops. */
993
994 typedef signed16 (*QH_ROUND)(signed48 a, signed16 s);
995
996 #define QH_BIT(n) ((unsigned48)1 << (n))
997 #define QH_ONES(n) (((unsigned48)1 << (n))-1)
998
999 static signed16
1000 RNASQH(signed48 a, signed16 s)
1001 {
1002 signed48 t;
1003 signed16 result = 0;
1004
1005 if (s > 48)
1006 result = 0;
1007 else
1008 {
1009 t = (a >> s);
1010 if ((a & QH_BIT(47)) == 0)
1011 {
1012 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1013 t++;
1014 if (t > QH_MAX)
1015 t = QH_MAX;
1016 }
1017 else
1018 {
1019 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1020 {
1021 if (s > 1 && ((unsigned48)a & QH_ONES(s-1)) != 0)
1022 t++;
1023 }
1024 if (t < QH_MIN)
1025 t = QH_MIN;
1026 }
1027 result = (signed16)t;
1028 }
1029 return result;
1030 }
1031
1032 static signed16
1033 RNAUQH(signed48 a, signed16 s)
1034 {
1035 unsigned48 t;
1036 signed16 result;
1037
1038 if (s > 48)
1039 result = 0;
1040 else if (s == 48)
1041 result = ((unsigned48)a & MASK48) >> 47;
1042 else
1043 {
1044 t = ((unsigned48)a & MASK48) >> s;
1045 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1046 t++;
1047 if (t > 0xFFFF)
1048 t = 0xFFFF;
1049 result = (signed16)t;
1050 }
1051 return result;
1052 }
1053
1054 static signed16
1055 RNESQH(signed48 a, signed16 s)
1056 {
1057 signed48 t;
1058 signed16 result = 0;
1059
1060 if (s > 47)
1061 result = 0;
1062 else
1063 {
1064 t = (a >> s);
1065 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1066 {
1067 if (s == 1 || (a & QH_ONES(s-1)) == 0)
1068 t += t & 1;
1069 else
1070 t += 1;
1071 }
1072 if ((a & QH_BIT(47)) == 0)
1073 {
1074 if (t > QH_MAX)
1075 t = QH_MAX;
1076 }
1077 else
1078 {
1079 if (t < QH_MIN)
1080 t = QH_MIN;
1081 }
1082 result = (signed16)t;
1083 }
1084 return result;
1085 }
1086
1087 static signed16
1088 RNEUQH(signed48 a, signed16 s)
1089 {
1090 unsigned48 t;
1091 signed16 result;
1092
1093 if (s > 48)
1094 result = 0;
1095 else if (s == 48)
1096 result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1097 else
1098 {
1099 t = ((unsigned48)a & MASK48) >> s;
1100 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1101 {
1102 if (s > 1 && (a & QH_ONES(s-1)) != 0)
1103 t++;
1104 else
1105 t += t & 1;
1106 }
1107 if (t > 0xFFFF)
1108 t = 0xFFFF;
1109 result = (signed16)t;
1110 }
1111 return result;
1112 }
1113
1114 static signed16
1115 RZSQH(signed48 a, signed16 s)
1116 {
1117 signed48 t;
1118 signed16 result = 0;
1119
1120 if (s > 47)
1121 result = 0;
1122 else
1123 {
1124 t = (a >> s);
1125 if ((a & QH_BIT(47)) == 0)
1126 {
1127 if (t > QH_MAX)
1128 t = QH_MAX;
1129 }
1130 else
1131 {
1132 if (t < QH_MIN)
1133 t = QH_MIN;
1134 }
1135 result = (signed16)t;
1136 }
1137 return result;
1138 }
1139
1140 static signed16
1141 RZUQH(signed48 a, signed16 s)
1142 {
1143 unsigned48 t;
1144 signed16 result = 0;
1145
1146 if (s > 48)
1147 result = 0;
1148 else if (s == 48)
1149 result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1150 else
1151 {
1152 t = ((unsigned48)a & MASK48) >> s;
1153 if (t > 0xFFFF)
1154 t = 0xFFFF;
1155 result = (signed16)t;
1156 }
1157 return result;
1158 }
1159
1160
1161 typedef unsigned8 (*OB_ROUND)(signed24 a, unsigned8 s);
1162
1163 #define OB_BIT(n) ((unsigned24)1 << (n))
1164 #define OB_ONES(n) (((unsigned24)1 << (n))-1)
1165
1166 static unsigned8
1167 RNAUOB(signed24 a, unsigned8 s)
1168 {
1169 unsigned8 result;
1170 unsigned24 t;
1171
1172 if (s > 24)
1173 result = 0;
1174 else if (s == 24)
1175 result = ((unsigned24)a & MASK24) >> 23;
1176 else
1177 {
1178 t = ((unsigned24)a & MASK24) >> s;
1179 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1180 t ++;
1181 result = OB_CLAMP(t);
1182 }
1183 return result;
1184 }
1185
1186 static unsigned8
1187 RNEUOB(signed24 a, unsigned8 s)
1188 {
1189 unsigned8 result;
1190 unsigned24 t;
1191
1192 if (s > 24)
1193 result = 0;
1194 else if (s == 24)
1195 result = (((unsigned24)a & MASK24) > OB_BIT(23) ? 1 : 0);
1196 else
1197 {
1198 t = ((unsigned24)a & MASK24) >> s;
1199 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1200 {
1201 if (s > 1 && (a & OB_ONES(s-1)) != 0)
1202 t++;
1203 else
1204 t += t & 1;
1205 }
1206 result = OB_CLAMP(t);
1207 }
1208 return result;
1209 }
1210
1211 static unsigned8
1212 RZUOB(signed24 a, unsigned8 s)
1213 {
1214 unsigned8 result;
1215 unsigned24 t;
1216
1217 if (s >= 24)
1218 result = 0;
1219 else
1220 {
1221 t = ((unsigned24)a & MASK24) >> s;
1222 result = OB_CLAMP(t);
1223 }
1224 return result;
1225 }
1226
1227
1228 static const QH_ROUND qh_round[] = {
1229 RNASQH, RNAUQH, RNESQH, RNEUQH, RZSQH, RZUQH
1230 };
1231
1232 static const OB_ROUND ob_round[] = {
1233 NULL, RNAUOB, NULL, RNEUOB, NULL, RZUOB
1234 };
1235
1236
1237 static unsigned64
1238 qh_vector_round(sim_cpu *cpu, address_word cia, unsigned64 v2, QH_ROUND round)
1239 {
1240 unsigned64 result = 0;
1241 int i, s;
1242 signed16 h, h2;
1243
1244 s = 0;
1245 for (i = 0; i < 4; i++)
1246 {
1247 h2 = (signed16)(v2 & 0xFFFF);
1248 if (h2 >= 0)
1249 h = (*round)(ACC.qh[i], h2);
1250 else
1251 {
1252 UnpredictableResult ();
1253 h = 0xdead;
1254 }
1255 v2 >>= 16;
1256 result |= ((unsigned64)((unsigned16)h) << s);
1257 s += 16;
1258 }
1259 return result;
1260 }
1261
1262 static unsigned64
1263 qh_map_round(sim_cpu *cpu, address_word cia, signed16 h2, QH_ROUND round)
1264 {
1265 unsigned64 result = 0;
1266 int i, s;
1267 signed16 h;
1268
1269 s = 0;
1270 for (i = 0; i < 4; i++)
1271 {
1272 if (h2 >= 0)
1273 h = (*round)(ACC.qh[i], h2);
1274 else
1275 {
1276 UnpredictableResult ();
1277 h = 0xdead;
1278 }
1279 result |= ((unsigned64)((unsigned16)h) << s);
1280 s += 16;
1281 }
1282 return result;
1283 }
1284
1285 static unsigned64
1286 ob_vector_round(sim_cpu *cpu, address_word cia, unsigned64 v2, OB_ROUND round)
1287 {
1288 unsigned64 result = 0;
1289 int i, s;
1290 unsigned8 b, b2;
1291
1292 s = 0;
1293 for (i = 0; i < 8; i++)
1294 {
1295 b2 = v2 & 0xFF; v2 >>= 8;
1296 b = (*round)(ACC.ob[i], b2);
1297 result |= ((unsigned64)b << s);
1298 s += 8;
1299 }
1300 return result;
1301 }
1302
1303 static unsigned64
1304 ob_map_round(sim_cpu *cpu, address_word cia, unsigned8 b2, OB_ROUND round)
1305 {
1306 unsigned64 result = 0;
1307 int i, s;
1308 unsigned8 b;
1309
1310 s = 0;
1311 for (i = 0; i < 8; i++)
1312 {
1313 b = (*round)(ACC.ob[i], b2);
1314 result |= ((unsigned64)b << s);
1315 s += 8;
1316 }
1317 return result;
1318 }
1319
1320
1321 unsigned64
1322 mdmx_round_op(sim_cpu *cpu,
1323 address_word cia,
1324 int rm,
1325 int vt,
1326 MX_fmtsel fmtsel)
1327 {
1328 unsigned64 op2;
1329 unsigned64 result = 0;
1330
1331 switch (MX_FMT (fmtsel))
1332 {
1333 case mdmx_qh:
1334 switch (MX_VT (fmtsel))
1335 {
1336 case sel_elem:
1337 op2 = ValueFPR(vt, fmt_mdmx);
1338 result = qh_map_round(cpu, cia, QH_ELEM(op2, fmtsel), qh_round[rm]);
1339 break;
1340 case sel_vect:
1341 op2 = ValueFPR(vt, fmt_mdmx);
1342 result = qh_vector_round(cpu, cia, op2, qh_round[rm]);
1343 break;
1344 case sel_imm:
1345 result = qh_map_round(cpu, cia, vt, qh_round[rm]);
1346 break;
1347 }
1348 break;
1349 case mdmx_ob:
1350 switch (MX_VT (fmtsel))
1351 {
1352 case sel_elem:
1353 op2 = ValueFPR(vt, fmt_mdmx);
1354 result = ob_map_round(cpu, cia, OB_ELEM(op2, fmtsel), ob_round[rm]);
1355 break;
1356 case sel_vect:
1357 op2 = ValueFPR(vt, fmt_mdmx);
1358 result = ob_vector_round(cpu, cia, op2, ob_round[rm]);
1359 break;
1360 case sel_imm:
1361 result = ob_map_round(cpu, cia, vt, ob_round[rm]);
1362 break;
1363 }
1364 break;
1365 default:
1366 Unpredictable ();
1367 }
1368
1369 return result;
1370 }
1371
1372
1373 /* Shuffle operation. */
1374
1375 typedef struct {
1376 enum {vs, ss, vt} source;
1377 unsigned int index;
1378 } sh_map;
1379
1380 static const sh_map ob_shuffle[][8] = {
1381 /* MDMX 2.0 encodings (3-4, 6-7). */
1382 /* vr5400 encoding (5), otherwise. */
1383 { }, /* RSVD */
1384 {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* RSVD */
1385 {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}}, /* RSVD */
1386 {{vs,0}, {ss,0}, {vs,1}, {ss,1}, {vs,2}, {ss,2}, {vs,3}, {ss,3}}, /* upsl */
1387 {{vt,1}, {vt,3}, {vt,5}, {vt,7}, {vs,1}, {vs,3}, {vs,5}, {vs,7}}, /* pach */
1388 {{vt,0}, {vt,2}, {vt,4}, {vt,6}, {vs,0}, {vs,2}, {vs,4}, {vs,6}}, /* pacl */
1389 {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* mixh */
1390 {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}} /* mixl */
1391 };
1392
1393 static const sh_map qh_shuffle[][4] = {
1394 {{vt,2}, {vs,2}, {vt,3}, {vs,3}}, /* mixh */
1395 {{vt,0}, {vs,0}, {vt,1}, {vs,1}}, /* mixl */
1396 {{vt,1}, {vt,3}, {vs,1}, {vs,3}}, /* pach */
1397 { }, /* RSVD */
1398 {{vt,1}, {vs,0}, {vt,3}, {vs,2}}, /* bfla */
1399 { }, /* RSVD */
1400 {{vt,2}, {vt,3}, {vs,2}, {vs,3}}, /* repa */
1401 {{vt,0}, {vt,1}, {vs,0}, {vs,1}} /* repb */
1402 };
1403
1404
1405 unsigned64
1406 mdmx_shuffle(sim_cpu *cpu,
1407 address_word cia,
1408 int shop,
1409 unsigned64 op1,
1410 unsigned64 op2)
1411 {
1412 unsigned64 result = 0;
1413 int i, s;
1414 int op;
1415
1416 if ((shop & 0x3) == 0x1) /* QH format. */
1417 {
1418 op = shop >> 2;
1419 s = 0;
1420 for (i = 0; i < 4; i++)
1421 {
1422 unsigned64 v;
1423
1424 switch (qh_shuffle[op][i].source)
1425 {
1426 case vs:
1427 v = op1;
1428 break;
1429 case vt:
1430 v = op2;
1431 break;
1432 default:
1433 Unpredictable ();
1434 v = 0;
1435 }
1436 result |= (((v >> 16*qh_shuffle[op][i].index) & 0xFFFF) << s);
1437 s += 16;
1438 }
1439 }
1440 else if ((shop & 0x1) == 0x0) /* OB format. */
1441 {
1442 op = shop >> 1;
1443 s = 0;
1444 for (i = 0; i < 8; i++)
1445 {
1446 unsigned8 b;
1447 unsigned int ishift = 8*ob_shuffle[op][i].index;
1448
1449 switch (ob_shuffle[op][i].source)
1450 {
1451 case vs:
1452 b = (op1 >> ishift) & 0xFF;
1453 break;
1454 case ss:
1455 b = ((op1 >> ishift) & 0x80) ? 0xFF : 0;
1456 break;
1457 case vt:
1458 b = (op2 >> ishift) & 0xFF;
1459 break;
1460 default:
1461 Unpredictable ();
1462 b = 0;
1463 }
1464 result |= ((unsigned64)b << s);
1465 s += 8;
1466 }
1467 }
1468 else
1469 Unpredictable ();
1470
1471 return result;
1472 }
This page took 0.059546 seconds and 4 git commands to generate.