4 // Ref: http://www.sgi.com/MIPS/arch/ISA5/MDMXspec.pdf
6 // Note: For OB instructions, the sel field is deduced by special
7 // handling of the "vt" operand.
9 // of the form $vt[0], then sel is 0000
10 // of the form $vt[1], then sel is 0001
11 // of the form $vt[2], then sel is 0010
12 // of the form $vt[3], then sel is 0011
13 // of the form $vt[4], then sel is 0100
14 // of the form $vt[5], then sel is 0101
15 // of the form $vt[6], then sel is 0110
16 // of the form $vt[7], then sel is 0111
17 // Normal register specifier, then sel is 1011
18 // Constant, then sel is 1111
20 // VecAcc is the Vector Accumulator.
21 // This accumulator is organized as 8X24 bit (192 bit) register.
22 // This accumulator holds only signed values.
25 // Verify that the instruction is valid for the curent Architecture
26 // If valid, return the scale (log nr bits) of a vector element
27 // as determined by SEL.
29 :function:::int:get_scale:int sel
32 switch (my_index X STATE_ARCHITECTURE)
42 /* octal byte - ssss0 */
46 /* quad halfword - sss01 */
50 semantic_illegal (CPU_, cia);
54 semantic_illegal (CPU_, cia);
63 // Fetch/Store VALUE in ELEMENT of vector register FPR.
64 // The the of the element determined by SCALE.
66 :function:::signed:value_vr:int scale, int fpr, int el
68 switch (FPR_STATE[fpr])
70 case fmt_uninterpreted:
71 FPR_STATE[fpr] = fmt_long;
77 sim_io_eprintf (SD, "Vector %d format invalid (PC = 0x%08lx)\n",
79 FPR_STATE[fpr] = fmt_unknown;
85 signed8 value = *A1_8 (&FGR[fpr], 7 - el);
90 signed16 value = *A2_8 (&FGR[fpr], 3 - el);
99 :function:::void:store_vr:int scale, int fpr, int element, signed value
101 switch (FPR_STATE[fpr])
103 case fmt_uninterpreted:
104 FPR_STATE[fpr] = fmt_long;
110 sim_io_eprintf (SD, "Vector %d format invalid (PC = 0x%08lx)\n",
112 FPR_STATE[fpr] = fmt_unknown;
118 *A1_8 (&FGR[fpr], 7 - element) = value;
123 *A2_8 (&FGR[fpr], 3 - element) = value;
132 // Select a value from onr of FGR[VT][ELEMENT], VT and GFR[VT][CONST]
135 :function:::unsigned:select_vr:int sel, int vt, int element
139 /* element select - 0xxxx */
140 case 0x00: /* 0 xxx 0 */
148 return value_vr (SD_, 0, vt, sel >> 1);
149 case 0x01: /* 0 xx 01 */
153 return value_vr (SD_, 1, vt, sel >> 2);
154 case 0x03: /* 0 x 011 */
156 return value_vr (SD_, 2, vt, sel >> 3);
157 case 0x07: /* 0 x 111 */
159 return value_vr (SD_, 3, vt, sel >> 4);
161 /* select vector - 10xxx */
162 case 0x16: /* 10 11 0 */
163 return value_vr (SD_, 0, vt, element);
164 case 0x15: /* 10 1 01 */
165 return value_vr (SD_, 1, vt, element);
166 case 0x13: /* 10 011 */
167 return value_vr (SD_, 2, vt, element);
168 case 0x17: /* 10 111 */
169 return value_vr (SD_, 3, vt, element);
171 /* select immediate - 11xxx */
172 case 0x1e: /* 11 11 0 */
173 case 0x1d: /* 11 1 01 */
174 case 0x1b: /* 11 011 */
175 case 0x1f: /* 11 111 */
183 // Saturate (clamp) the signed value to (8 << SCALE) bits.
185 :function:::signed:Clamp:int scale, signed value
191 if (value != (signed8) value)
202 if (value != (signed16) value)
209 return value & 0xffff;
218 // Access a single bit of the floating point CC register.
220 :function:::void:store_cc:int i, int value
225 :function:::int:value_cc:int i
231 // Read/write the accumulator
233 :function:::signed64:value_acc:int scale, int element
239 value |= (unsigned64) (unsigned8) CPU->acc [element * 3 + 0];
240 value |= (unsigned64) (unsigned8) CPU->acc [element * 3 + 1] << 8;
241 value |= (signed64) (signed8) CPU->acc [element * 3 + 2] << 16;
244 value |= (unsigned64) (unsigned8) CPU->acc [element * 3 + 0];
245 value |= (unsigned64) (unsigned8) CPU->acc [element * 3 + 1] << 8;
246 value |= (unsigned64) (unsigned8) CPU->acc [element * 3 + 2] << 16;
247 value |= (unsigned64) (unsigned8) CPU->acc [element * 3 + 3] << 24;
248 value |= (unsigned64) (unsigned8) CPU->acc [element * 3 + 4] << 32;
249 value |= (signed64) (signed8) CPU->acc [element * 3 + 5] << 40;
255 :function:::void:store_acc:int scale, int element, signed64 value
260 CPU->acc [element * 3 + 0] = value >> 0;
261 CPU->acc [element * 3 + 1] = value >> 8;
262 CPU->acc [element * 3 + 2] = value >> 16;
265 CPU->acc [element * 3 + 0] = value >> 0;
266 CPU->acc [element * 3 + 1] = value >> 8;
267 CPU->acc [element * 3 + 2] = value >> 16;
268 CPU->acc [element * 3 + 3] = value >> 24;
269 CPU->acc [element * 3 + 4] = value >> 32;
270 CPU->acc [element * 3 + 5] = value >> 40;
278 :%s::::VT:int sel, int vt
282 sprintf (buf, "v%d[%d]", vt, sel);
283 else if (sel == 0x13)
284 sprintf (buf, "v%d", vt);
285 else if (sel == 0x1f)
286 sprintf (buf, "%d", vt);
288 sprintf (buf, "(invalid)");
314 010010,5.SEL,5.VT,5.VS,5.VD,001011::::ADD.fmt
315 "add.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
317 // start-sanitize-vr5400
319 // end-sanitize-vr5400
322 int scale = get_scale (SD_, SEL);
323 for (i = 0; i < (8 >> scale); i++)
324 store_vr (SD_, scale, VD, i,
326 (value_vr (SD_, scale, VS, i)
327 + select_vr (SD_, SEL, VT, i))));
331 // Accumulate Vector Add
333 010010,5.SEL,5.VT,5.VS,1,0000,110111::::ADDA.fmt
334 "adda.%s<SEL> v<VD>, v<VS>"
338 int scale = get_scale (SD_, SEL);
339 for (i = 0; i < (8 >> scale); i++)
340 store_acc (SD_, scale, i,
341 (value_acc (SD_, scale, i)
342 + (signed64) value_vr (SD_, scale, VS, i)
343 + (signed64) select_vr (SD_, SEL, VT, i)));
349 010010,5.SEL,5.VT,5.VS,0,0000,110111::::ADDA.fmt
350 "addl.%s<SEL> v<VD>, v<VS>"
354 int scale = get_scale (SD_, SEL);
355 for (i = 0; i < (8 >> scale); i++)
356 store_acc (SD_, scale, i,
357 ((signed64) value_vr (SD_, scale, VS, i)
358 + (signed64) select_vr (SD_, SEL, VT, i)));
363 // Vector align, Constant Alignment
365 :function:::void:ByteAlign:int vd, int imm, int vs, int vt
368 unsigned64 rs = ValueFPR (vs, fmt_long);
369 unsigned64 rt = ValueFPR (vt, fmt_long);
373 /* (vs || vt) [127 - S .. 64 - S] */
377 rd = (MOVED64 (rs, 64 - s, 0, 63, s)
378 | EXTRACTED64 (rt, 63, 64 - s));
382 /* (vs || vt) [63 + S .. S] */
386 rd = (MOVED64 (rs, s, 0, 63, 64 - s)
387 | EXTRACTED64 (rt, 63, s));
389 StoreFPR (vd, fmt_long, rd);
392 010010,00,3.IMM,5.VT,5.VS,5.VD,0110,X,0::::ALNI.fmt
393 "alni.%s<FMT#X> v<VD>, v<VS>, v<VT>, <IMM>"
395 // start-sanitize-vr5400
397 // end-sanitize-vr5400
399 ByteAlign (SD_, VD, IMM, VS, VT);
404 // Vector align, Variable Alignment
406 010010,5.RS,5.VT,5.VS,5.VD,0110,X,1::::ALNV.fmt
407 "alnv.%s<FMT#X> v<VD>, v<VS>, v<VT>, r<RS>"
410 ByteAlign (SD_, VD, GPR[RS], VS, VT);
417 010010,5.SEL,5.VT,5.VS,5.VD,001100::::AND.fmt
418 "and.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
420 // start-sanitize-vr5400
422 // end-sanitize-vr5400
425 int scale = get_scale (SD_, SEL);
426 for (i = 0; i < (8 >> scale); i++)
427 store_vr (SD_, scale, VD, i,
428 (value_vr (SD_, scale, VS, i)
429 & select_vr (SD_, SEL, VT, i)));
434 // Vector Compare Equal.
437 010010,5.SEL,5.VT,5.VS,00000,000001::::C.EQ.fmt
438 "c.EQ.%s<SEL> v<VS>, %s<VT#SEL,VT>"
440 // start-sanitize-vr5400
442 // end-sanitize-vr5400
445 int scale = get_scale (SD_, SEL);
446 for (i = 0; i < (8 >> scale); i++)
448 (value_vr (SD_, scale, VS, i)
449 == select_vr (SD_, SEL, VT, i)));
454 // Vector Compare Less Than or Equal.
456 010010,5.SEL,5.VT,5.VS,00000,000101::::C.LE.fmt
457 "c.le.%s<SEL> v<VS>, %s<VT#SEL,VT>"
459 // start-sanitize-vr5400
461 // end-sanitize-vr5400
464 int scale = get_scale (SD_, SEL);
465 for (i = 0; i < (8 >> scale); i++)
467 (value_vr (SD_, scale, VS, i)
468 <= select_vr (SD_, SEL, VT, i)));
473 // Vector Compare Less Than.
475 010010,5.SEL,5.VT,5.VS,00000,000100::::C.LT.fmt
476 "c.lt.%s<SEL> v<VS>, %s<VT#SEL,VT>"
478 // start-sanitize-vr5400
480 // end-sanitize-vr5400
483 int scale = get_scale (SD_, SEL);
484 for (i = 0; i < (8 >> scale); i++)
486 (value_vr (SD_, scale, VS, i)
487 < select_vr (SD_, SEL, VT, i)));
494 :function:::signed:Max:int scale, signed l, signed r
502 010010,5.SEL,5.VT,5.VS,5.VD,000111::::MAX.fmt
503 "max.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
505 // start-sanitize-vr5400
507 // end-sanitize-vr5400
510 int scale = get_scale (SD_, SEL);
511 for (i = 0; i < (8 >> scale); i++)
512 store_vr (SD_, scale, VD, i,
514 value_vr (SD_, scale, VS, i),
515 select_vr (SD_, SEL, VT, i)));
522 :function:::signed:Min:int scale, signed l, signed r
530 010010,5.SEL,5.VT,5.VS,5.VD,000110::::MIN.fmt
531 "min.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
533 // start-sanitize-vr5400
535 // end-sanitize-vr5400
538 int scale = get_scale (SD_, SEL);
539 for (i = 0; i < (8 >> scale); i++)
540 store_vr (SD_, scale, VD, i,
542 value_vr (SD_, scale, VS, i),
543 select_vr (SD_, SEL, VT, i)));
550 :function:::signed:Sign:int scale, signed l, signed r
558 /* watch for overflow of MIN_INT */
562 if ((r & 0xff) == 0x80)
567 if ((r & 0xffff) == 0x8000)
578 010010,5.SEL,5.VT,5.VS,5.VD,000110::::MSGN.fmt
579 "msgn.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
583 int scale = get_scale (SD_, SEL);
585 /* only QH allowed */
586 semantic_illegal (sd, cia);
587 for (i = 0; i < (8 >> scale); i++)
588 store_vr (SD_, scale, VD, i,
590 value_vr (SD_, scale, VS, i),
591 select_vr (SD_, SEL, VT, i)));
598 010010,5.SEL,5.VT,5.VS,5.VD,110000::::MUL.fmt
599 "mul.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
601 // start-sanitize-vr5400
603 // end-sanitize-vr5400
606 int scale = get_scale (SD_, SEL);
607 for (i = 0; i < (8 >> scale); i++)
608 store_vr (SD_, scale, VD, i,
610 (value_vr (SD_, scale, VS, i)
611 * select_vr (SD_, SEL, VT, i))));
616 // Accumulate Vector Multiply
618 010010,5.SEL,5.VT,5.VS,00000,110011::::MULA.fmt
619 "mula.%s<SEL> v<VS>, %s<VT#SEL,VT>"
621 // start-sanitize-vr5400
623 // end-sanitize-vr5400
626 int scale = get_scale (SD_, SEL);
627 for (i = 0; i < (8 >> scale); i++)
628 store_acc (SD_, scale, i,
629 (value_acc (SD_, scale, i)
630 + ((signed64) value_vr (SD_, scale, VS, i)
631 * (signed64) select_vr (SD_, SEL, VT, i))));
636 // Add Vector Multiply to Accumulator.
638 010010,5.SEL,5.VT,5.VS,10000,110011::::MULL.fmt
639 "mull.%s<SEL> v<VS>, %s<VT#SEL,VT>"
641 // start-sanitize-vr5400
643 // end-sanitize-vr5400
646 int scale = get_scale (SD_, SEL);
647 for (i = 0; i < (8 >> scale); i++)
648 store_acc (SD_, scale, i,
649 ((signed64) value_vr (SD_, scale, VS, i)
650 * (signed64) select_vr (SD_, SEL, VT, i)));
655 // Subtract Vector Multiply from Accumulator
657 010010,5.SEL,5.VT,5.VS,00000,110010::::MULS.fmt
658 "muls.%s<SEL> v<VS>, %s<VT#SEL,VT>"
660 // start-sanitize-vr5400
662 // end-sanitize-vr5400
665 int scale = get_scale (SD_, SEL);
666 for (i = 0; i < (8 >> scale); i++)
667 store_acc (SD_, scale, i,
668 (value_acc (SD_, scale, i)
669 - ((signed64) value_vr (SD_, scale, VS, i)
670 * (signed64) select_vr (SD_, SEL, VT, i))));
675 // Load Negative Vector Multiply
677 010010,5.SEL,5.VT,5.VS,10000,110010::::MULSL.fmt
678 "mulsl.%s<SEL> v<VS>, %s<VT#SEL,VT>"
680 // start-sanitize-vr5400
682 // end-sanitize-vr5400
685 int scale = get_scale (SD_, SEL);
686 for (i = 0; i < (8 >> scale); i++)
687 store_acc (SD_, scale, i,
688 - ((signed64) value_vr (SD_, scale, VS, i)
689 * (signed64) select_vr (SD_, SEL, VT, i)));
696 010010,5.SEL,5.VT,5.VS,5.VD,001111::::NOR.fmt
697 "nor.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
699 // start-sanitize-vr5400
701 // end-sanitize-vr5400
704 int scale = get_scale (SD_, SEL);
705 for (i = 0; i < (8 >> scale); i++)
706 store_vr (SD_, scale, VD, i,
707 ~(value_vr (SD_, scale, VS, i)
708 | select_vr (SD_, SEL, VT, i)));
715 010010,5.SEL,5.VT,5.VS,5.VD,001110::::OR.fmt
716 "or.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
718 // start-sanitize-vr5400
720 // end-sanitize-vr5400
723 int scale = get_scale (SD_, SEL);
724 for (i = 0; i < (8 >> scale); i++)
725 store_vr (SD_, scale, VD, i,
726 (value_vr (SD_, scale, VS, i)
727 | select_vr (SD_, SEL, VT, i)));
732 // Select Vector Elements - False
734 010010,5.SEL,5.VT,5.VS,5.VD,000010::::PICKF.fmt
735 "pickf.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
737 // start-sanitize-vr5400
739 // end-sanitize-vr5400
742 int scale = get_scale (SD_, SEL);
743 for (i = 0; i < (8 >> scale); i++)
744 store_vr (SD_, scale, VD, i,
745 (value_cc (SD_, i) == 0
746 ? value_vr (SD_, scale, VS, i)
747 : select_vr (SD_, SEL, VT, i)));
752 // Select Vector Elements - True
754 010010,5.SEL,5.VT,5.VS,5.VD,000011::::PICKT.fmt
755 "pickt.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
757 // start-sanitize-vr5400
759 // end-sanitize-vr5400
762 int scale = get_scale (SD_, SEL);
763 for (i = 0; i < (8 >> scale); i++)
764 store_vr (SD_, scale, VD, i,
765 (value_cc (SD_, i) != 0
766 ? value_vr (SD_, scale, VS, i)
767 : select_vr (SD_, SEL, VT, i)));
772 // Scale, Round and Clamp Accumulator
795 :function:::signed:ScaleRoundClamp:int scale, int rnd, signed val, signed shift
797 int halfway = (1 << (shift - 1));
798 /* must be positive */
801 /* too much shift? */
818 case 0: /* round towards zero */
820 case 1: /* nearest, halfaway rounds away from zero */
826 case 2: /* nearest, halfway rounds to even! */
829 if (val & (halfway << 1))
832 val += (halfway - 1);
836 if (val & (halfway << 1))
839 val -= (halfway - 1);
867 /* normal signed clamp */
868 val = Clamp (_SD, scale, val);
874 010010,5.SEL,5.VT,00000,5.VD,100,3.RND::::Rx.fmt
875 "r%s<RND>.%s<SEL> v<VD>, v<VT>"
877 // start-sanitize-vr5400
879 // end-sanitize-vr5400
882 int scale = get_scale (SD_, SEL);
883 for (i = 0; i < (8 >> scale); i++)
884 store_vr (SD_, scale, VD, i,
885 ScaleRoundClamp (SD_, scale, RND,
886 value_acc (SD_, scale, i),
887 select_vr (SD_, SEL, VT, i)));
892 // Vector Read Accumulator Low.
894 010010,0000,1.SEL,00000,00000,5.VD,111111::::RACL.fmt
897 // start-sanitize-vr5400
899 // end-sanitize-vr5400
902 int scale = get_scale (SD_, SEL);
903 for (i = 0; i < (8 >> scale); i++)
904 store_vr (SD_, scale, VD, i,
905 EXTRACTED (value_acc (SD_, scale, i),
912 // Vector Read Accumulator Middle.
914 010010,0100,1.SEL,00000,00000,5.VD,111111::::RACM.fmt
917 // start-sanitize-vr5400
919 // end-sanitize-vr5400
922 int scale = get_scale (SD_, SEL);
923 for (i = 0; i < (8 >> scale); i++)
924 store_vr (SD_, scale, VD, i,
925 EXTRACTED (value_acc (SD_, scale, i),
932 // Vector Read Accumulator High.
934 010010,1000,1.SEL,00000,00000,5.VD,111111::::RACH.fmt
937 // start-sanitize-vr5400
939 // end-sanitize-vr5400
942 int scale = get_scale (SD_, SEL);
943 for (i = 0; i < (8 >> scale); i++)
944 store_vr (SD_, scale, VD, i,
945 EXTRACTED (value_acc (SD_, scale, i),
952 // Vector Element Shuffle.
954 010010,0000,0,5.VT,5.VS,5.VD,011111::::SHFL.UPUH.fmt
955 "shfl.upuh.%s<SEL> v<VD>, v<VS>, <VT>"
959 int scale = get_scale (SD_, SEL);
960 for (i = 0; i < 4; i++)
962 store_vr (SD_, 1, VD, i,
963 value_vr (SD_, 0, VS, i + 4) & 0xff);
967 010010,0001,0,5.VT,5.VS,5.VD,011111::::SHFL.UPUL.fmt
968 "shfl.upul.%s<SEL> v<VD>, v<VS>, <VT>"
972 for (i = 0; i < 4; i++)
974 store_vr (SD_, 1, VD, i,
975 value_vr (SD_, 0, VS, i) & 0xff);
979 010010,0000,0,5.VT,5.VS,5.VD,011111::::SHFL.UPSH.fmt
980 "shfl.upsh.%s<SEL> v<VD>, v<VS>, <VT>"
984 int scale = get_scale (SD_, SEL);
985 for (i = 0; i < 4; i++)
987 store_vr (SD_, 1, VD, i,
988 value_vr (SD_, 0, VS, i + 4));
992 010010,0001,0,5.VT,5.VS,5.VD,011111::::SHFL.UPSL.fmt
993 "shfl.upsl.%s<SEL> v<VD>, v<VS>, <VT>"
997 for (i = 0; i < 4; i++)
999 store_vr (SD_, 1, VD, i,
1000 value_vr (SD_, 0, VS, i));
1004 010010,0100,1.SEL,5.VT,5.VS,5.VD,011111::::SHFL.PACH.fmt
1005 "shfl.pach.%s<SEL> v<VD>, v<VS>, <VT>"
1007 // start-sanitize-vr5400
1009 // end-sanitize-vr5400
1012 int scale = get_scale (SD_, SEL);
1013 for (i = 0; i < (4 >> scale); i++)
1015 store_vr (SD_, scale, VD, i,
1016 value_vr (SD_, scale, VT, i * 2 + 1));
1017 store_vr (SD_, scale, VD, 1 + (4 >> scale),
1018 value_vr (SD_, scale, VS, i * 2 + 1));
1022 010010,0101,1.SEL,5.VT,5.VS,5.VD,011111::::SHFL.PACL.fmt
1023 "shfl.pacl.%s<SEL> v<VD>, v<VS>, <VT>"
1025 // start-sanitize-vr5400
1027 // end-sanitize-vr5400
1030 int scale = get_scale (SD_, SEL);
1031 for (i = 0; i < (4 >> scale); i++)
1033 store_vr (SD_, scale, VD, i,
1034 value_vr (SD_, scale, VT, i * 2));
1035 store_vr (SD_, scale, VD, 1 + (4 >> scale),
1036 value_vr (SD_, scale, VS, i * 2));
1040 010010,0110,1.SEL,5.VT,5.VS,5.VD,011111::::SHFL.MIXH.fmt
1041 "shfl.mixh.%s<SEL> v<VD>, v<VS>, <VT>"
1043 // start-sanitize-vr5400
1045 // end-sanitize-vr5400
1048 int scale = get_scale (SD_, SEL);
1049 for (i = 0; i < (4 >> scale); i++)
1051 store_vr (SD_, scale, VD, i * 2,
1052 value_vr (SD_, scale, VT, i + (4 >> scale)));
1053 store_vr (SD_, scale, VD, i * 2 + 1,
1054 value_vr (SD_, scale, VS, i + (4 >> scale)));
1058 010010,0111,1.SEL,5.VT,5.VS,5.VD,011111::::SHFL.MIXL.fmt
1059 "shfl.mixl.%s<SEL> v<VD>, v<VS>, <VT>"
1061 // start-sanitize-vr5400
1063 // end-sanitize-vr5400
1066 int scale = get_scale (SD_, SEL);
1067 for (i = 0; i < (4 >> scale); i++)
1069 store_vr (SD_, scale, VD, i * 2,
1070 value_vr (SD_, scale, VT, i));
1071 store_vr (SD_, scale, VD, i * 2 + 1,
1072 value_vr (SD_, scale, VS, i));
1076 010010,100,01,5.VT,5.VS,5.VD,011111::::SHFL.BFLA.fmt
1077 "shfl.bfla.qh v<VD>, v<VS>, <VT>"
1080 store_vr (SD_, 1, VD, 0,
1081 value_vr (SD_, 1, VT, 1));
1082 store_vr (SD_, 1, VD, 1,
1083 value_vr (SD_, 1, VS, 0));
1084 store_vr (SD_, 1, VD, 2,
1085 value_vr (SD_, 1, VT, 3));
1086 store_vr (SD_, 1, VD, 3,
1087 value_vr (SD_, 1, VS, 2));
1090 010010,101,01,5.VT,5.VS,5.VD,011111::::SHFL.BFLB.fmt
1091 "shfl.bflb.qh v<VD>, v<VS>, <VT>"
1094 store_vr (SD_, 1, VD, 0,
1095 value_vr (SD_, 1, VT, 3));
1096 store_vr (SD_, 1, VD, 1,
1097 value_vr (SD_, 1, VS, 2));
1098 store_vr (SD_, 1, VD, 2,
1099 value_vr (SD_, 1, VT, 1));
1100 store_vr (SD_, 1, VD, 3,
1101 value_vr (SD_, 1, VS, 0));
1104 010010,101,01,5.VT,5.VS,5.VD,011111::::SHFL.REPA.fmt
1105 "shfl.repa.qh v<VD>, v<VS>, <VT>"
1108 store_vr (SD_, 1, VD, 0,
1109 value_vr (SD_, 1, VT, 2));
1110 store_vr (SD_, 1, VD, 1,
1111 value_vr (SD_, 1, VT, 3));
1112 store_vr (SD_, 1, VD, 2,
1113 value_vr (SD_, 1, VS, 2));
1114 store_vr (SD_, 1, VD, 3,
1115 value_vr (SD_, 1, VS, 3));
1118 010010,101,01,5.VT,5.VS,5.VD,011111::::SHFL.REPB.fmt
1119 "shfl.repb.qh v<VD>, v<VS>, <VT>"
1122 store_vr (SD_, 1, VD, 0,
1123 value_vr (SD_, 1, VT, 0));
1124 store_vr (SD_, 1, VD, 1,
1125 value_vr (SD_, 1, VT, 1));
1126 store_vr (SD_, 1, VD, 2,
1127 value_vr (SD_, 1, VS, 0));
1128 store_vr (SD_, 1, VD, 3,
1129 value_vr (SD_, 1, VS, 1));
1134 // Vector Shift Left Logical
1136 010010,5.SEL,5.VT,5.VS,5.VD,010000::::SLL.fmt
1137 "sll.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
1139 // start-sanitize-vr5400
1141 // end-sanitize-vr5400
1144 int scale = get_scale (SD_, SEL);
1145 int mask = (4 << scale) - 1;
1146 for (i = 0; i < (8 >> scale); i++)
1147 store_vr (SD_, scale, VD, i,
1148 (value_vr (SD_, scale, VS, i)
1149 << (select_vr (SD_, SEL, VT, i) & mask)));
1154 // Vector Shift Right Arithmetic
1156 010010,5.SEL,5.VT,5.VS,5.VD,010011::::SRA.fmt
1157 "sra.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
1161 int mask = (4 << scale) - 1;
1162 int scale = get_scale (SD_, SEL);
1163 for (i = 0; i < (8 >> scale); i++)
1164 store_vr (SD_, scale, VD, i,
1165 (value_vr (SD_, scale, VS, i)
1166 >> (select_vr (SD_, SEL, VT, i) & mask)));
1171 // Vector Shift Right Logical.
1173 010010,5.SEL,5.VT,5.VS,5.VD,010010::::SRL.fmt
1174 "srl.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
1176 // start-sanitize-vr5400
1178 // end-sanitize-vr5400
1181 int scale = get_scale (SD_, SEL);
1182 int mask = (4 << scale) - 1;
1183 int zeros = (1 << (8 << scale)) - 1;
1184 for (i = 0; i < (8 >> scale); i++)
1185 store_vr (SD_, scale, VD, i,
1186 ((value_vr (SD_, scale, VS, i) & zeros)
1187 >> (select_vr (SD_, SEL, VT, i) & mask)));
1194 010010,5.SEL,5.VT,5.VS,5.VD,001010::::SUB.fmt
1195 "sub.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
1197 // start-sanitize-vr5400
1199 // end-sanitize-vr5400
1202 int scale = get_scale (SD_, SEL);
1203 for (i = 0; i < (8 >> scale); i++)
1204 store_vr (SD_, scale, VD, i,
1205 (value_vr (SD_, scale, VS, i)
1206 - select_vr (SD_, SEL, VT, i)));
1211 // Accumulate Vector Difference
1213 010010,5.SEL,5.VT,5.VS,0,0000,110110::::SUBA.fmt
1214 "suba.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
1218 int scale = get_scale (SD_, SEL);
1219 for (i = 0; i < (8 >> scale); i++)
1220 store_acc (SD_, scale, VD, i,
1221 (value_acc (SD, scale, i)
1222 + (signed64) value_vr (SD_, scale, VS, i)
1223 - (signed64) select_vr (SD_, SEL, VT, i)));
1228 // Load Vector Difference
1230 010010,5.SEL,5.VT,5.VS,1,0000,110110::::SUBL.fmt
1231 "subl.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
1235 int scale = get_scale (SD_, SEL);
1236 for (i = 0; i < (8 >> scale); i++)
1237 store_acc (SD_, scale, VD, i,
1238 ((signed64) value_vr (SD_, scale, VS, i)
1239 - (signed64) select_vr (SD_, SEL, VT, i)));
1244 // Write Accumulator High.
1246 010010,1000,1.SEL,00000,5.VS,00000,111110::::WACH.fmt
1247 "wach.%s<SEL> v<VS>"
1249 // start-sanitize-vr5400
1251 // end-sanitize-vr5400
1254 int scale = get_scale (SD_, SEL);
1255 for (i = 0; i < (8 >> scale); i++)
1256 store_acc (SD_, scale, i,
1257 (((signed64) value_vr (SD_, scale, VS, i) << (16 << scale))
1258 | MASKED (value_acc (SD_, scale, i), (16 << scale) - 1, 0)));
1263 // Vector Write Accumulator Low.
1265 010010,0000,1.SEL,5.VT,5.VS,00000,111110::::WACL.fmt
1266 "wacl.%s<SEL> v<VS>, <VT>"
1268 // start-sanitize-vr5400
1270 // end-sanitize-vr5400
1273 int scale = get_scale (SD_, SEL);
1274 for (i = 0; i < (8 >> scale); i++)
1275 store_acc (SD_, scale, i,
1276 (((signed64) value_vr (SD_, scale, VS, i) << (16 << scale))
1277 | MASKED (value_vr (SD_, scale, VT, i),
1278 (16 << scale) - 1, 0)));
1285 010010,5.SEL,5.VT,5.VS,5.VD,001101::::XOR.fmt
1286 "xor.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
1288 // start-sanitize-vr5400
1290 // end-sanitize-vr5400
1293 int scale = get_scale (SD_, SEL);
1294 for (i = 0; i < (8 >> scale); i++)
1295 store_vr (SD_, scale, VD, i,
1296 (value_vr (SD_, scale, VS, i)
1297 ^ select_vr (SD_, SEL, VT, i)));