+ /* confirm that FIFO has enough words in it */
+ if(num_operands > 0)
+ last_operand_word = pke_pcrel_operand(me, num_operands);
+ if(last_operand_word != NULL || num_operands == 0)
+ {
+ address_word vu_addr_base, vutrack_addr_base;
+ address_word vu_addr_max_size;
+ int vector_num_out, vector_num_in;
+
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* don't check whether VU is idle */
+
+ /* compute VU address base */
+ if(me->pke_number == 0)
+ {
+ vu_addr_base = VU0_MEM1_WINDOW_START;
+ vu_addr_max_size = VU0_MEM1_SIZE;
+ vutrack_addr_base = VU0_MEM1_SRCADDR_START;
+ r = 0;
+ }
+ else
+ {
+ vu_addr_base = VU1_MEM1_WINDOW_START;
+ vu_addr_max_size = VU1_MEM1_SIZE;
+ vutrack_addr_base = VU1_MEM1_SRCADDR_START;
+ }
+
+ /* set NUM */
+ PKE_REG_MASK_SET(me, NUM, NUM, nummx);
+
+ /* transfer given number of vectors */
+ vector_num_out = 0; /* output vector number being processed */
+ vector_num_in = 0; /* argument vector number being processed */
+ do
+ {
+ quadword vu_old_data;
+ quadword vu_new_data;
+ quadword unpacked_data;
+ address_word vu_addr;
+ address_word vutrack_addr;
+ unsigned_4 source_addr = 0;
+ int i;
+ int next_num;
+
+ /* decrement NUM */
+ next_num = PKE_REG_MASK_GET(me, NUM, NUM) - 1;
+ PKE_REG_MASK_SET(me, NUM, NUM, next_num);
+
+ /* compute VU destination address, as bytes in R5900 memory */
+ if(cl >= wl)
+ {
+ /* map zero to max+1 */
+ vu_addr = vu_addr_base + 16 * (BIT_MASK_GET(imm, 0, 9) +
+ (vector_num_out / addrwl) * cl +
+ (vector_num_out % addrwl));
+ }
+ else
+ vu_addr = vu_addr_base + 16 * (BIT_MASK_GET(imm, 0, 9) +
+ vector_num_out);
+
+ /* handle "R" double-buffering bit */
+ if(r)
+ vu_addr += 16 * PKE_REG_MASK_GET(me, TOPS, TOPS);
+
+ /* check for vu_addr overflow */
+ while(vu_addr >= vu_addr_base + vu_addr_max_size)
+ vu_addr -= vu_addr_max_size;
+
+ /* compute address of tracking table entry */
+ vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 4;
+
+ /* read old VU data word at address; reverse words if needed */
+ {
+ unsigned_16 vu_old_badwords;
+ ASSERT(sizeof(vu_old_badwords) == 16);
+ PKE_MEM_READ(me, vu_addr,
+ &vu_old_badwords, 16);
+ vu_old_data[0] = * A4_16(& vu_old_badwords, 3);
+ vu_old_data[1] = * A4_16(& vu_old_badwords, 2);
+ vu_old_data[2] = * A4_16(& vu_old_badwords, 1);
+ vu_old_data[3] = * A4_16(& vu_old_badwords, 0);
+ }
+
+ /* For cyclic unpack, next operand quadword may come from instruction stream
+ or be zero. */
+ if((cl < addrwl) &&
+ (vector_num_out % addrwl) >= cl)
+ {
+ /* clear operand - used only in a "indeterminate" state */
+ for(i = 0; i < 4; i++)
+ unpacked_data[i] = 0;
+ }
+ else
+ {
+ /* compute packed vector dimensions */
+ int vectorbits = 0, unitbits = 0;
+
+ if(vl < 3) /* PKE_UNPACK_*_{32,16,8} */
+ {
+ unitbits = (32 >> vl);
+ vectorbits = unitbits * (vn+1);
+ }
+ else if(vl == 3 && vn == 3) /* PKE_UNPACK_V4_5 */
+ {
+ unitbits = 5;
+ vectorbits = 16;
+ }
+ else /* illegal unpack variant */
+ {
+ /* should have been caught at top of function */
+ ASSERT(0);
+ }
+
+ /* loop over columns */
+ for(i=0; i<=vn; i++)
+ {
+ unsigned_4 operand;
+
+ /* offset in bits in current operand word */
+ int bitoffset =
+ (vector_num_in * vectorbits) + (i * unitbits); /* # of bits from PKEcode */
+
+ /* last unit of V4_5 is only one bit wide */
+ if(vl == 3 && vn == 3 && i == 3) /* PKE_UNPACK_V4_5 */
+ unitbits = 1;
+
+ /* confirm we're not reading more than we said we needed */
+ if(vector_num_in * vectorbits >= num_operands * 32)
+ {
+ /* this condition may be triggered by illegal
+ PKEcode / CYCLE combinations. */
+ pke_code_error(me, pkecode);
+ /* XXX: this case needs to be better understood,
+ and detected at a better time. */
+ return;
+ }
+
+ /* fetch bitfield operand */
+ operand = pke_pcrel_operand_bits(me, bitoffset, unitbits, & source_addr);
+
+ /* selectively sign-extend; not for V4_5 1-bit value */
+ if(usn || unitbits == 1)
+ unpacked_data[i] = operand;
+ else
+ unpacked_data[i] = SEXT32(operand, unitbits-1);
+ }
+
+ /* set remaining top words in vector */
+ for(i=vn+1; i<4; i++)
+ {
+ if(vn == 0) /* S_{32,16,8}: copy lowest element */
+ unpacked_data[i] = unpacked_data[0];
+ else
+ unpacked_data[i] = 0;
+ }
+
+ /* consumed a vector from the PKE instruction stream */
+ vector_num_in ++;
+ } /* unpack word from instruction operand */
+
+ /* process STMOD register for accumulation operations */
+ switch(PKE_REG_MASK_GET(me, MODE, MDE))
+ {
+ case PKE_MODE_ADDROW: /* add row registers to output data */
+ case PKE_MODE_ACCROW: /* same .. later conditionally accumulate */
+ for(i=0; i<4; i++)
+ /* exploit R0..R3 contiguity */
+ unpacked_data[i] += me->regs[PKE_REG_R0 + i][0];
+ break;
+
+ case PKE_MODE_INPUT: /* pass data through */
+ default: /* specified as undefined */
+ ;
+ }
+
+ /* compute replacement word */
+ if(m) /* use mask register? */
+ {
+ /* compute index into mask register for this word */
+ int mask_index = PKE_LIMIT(vector_num_out % addrwl, 3);
+
+ for(i=0; i<4; i++) /* loop over columns */
+ {
+ int mask_op = PKE_MASKREG_GET(me, mask_index, i);
+ unsigned_4* masked_value = NULL;
+
+ switch(mask_op)
+ {
+ case PKE_MASKREG_INPUT:
+ masked_value = & unpacked_data[i];
+
+ /* conditionally accumulate */
+ if(PKE_REG_MASK_GET(me, MODE, MDE) == PKE_MODE_ACCROW)
+ me->regs[PKE_REG_R0 + i][0] = unpacked_data[i];
+
+ break;
+
+ case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */
+ masked_value = & me->regs[PKE_REG_R0 + i][0];
+ break;
+
+ case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */
+ masked_value = & me->regs[PKE_REG_C0 + mask_index][0];
+ break;
+
+ case PKE_MASKREG_NOTHING:
+ /* "write inhibit" by re-copying old data */
+ masked_value = & vu_old_data[i];
+ break;
+
+ default:
+ ASSERT(0);
+ /* no other cases possible */
+ }
+
+ /* copy masked value for column */
+ vu_new_data[i] = *masked_value;
+ } /* loop over columns */
+ } /* mask */
+ else
+ {
+ /* no mask - just copy over entire unpacked quadword */
+ memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data));
+
+ /* conditionally store accumulated row results */
+ if(PKE_REG_MASK_GET(me, MODE, MDE) == PKE_MODE_ACCROW)
+ for(i=0; i<4; i++)
+ me->regs[PKE_REG_R0 + i][0] = unpacked_data[i];
+ }
+
+ /* write new VU data word at address; reverse words if needed */
+ {
+ unsigned_16 vu_new_badwords;
+ * A4_16(& vu_new_badwords, 3) = vu_new_data[0];
+ * A4_16(& vu_new_badwords, 2) = vu_new_data[1];
+ * A4_16(& vu_new_badwords, 1) = vu_new_data[2];
+ * A4_16(& vu_new_badwords, 0) = vu_new_data[3];
+ ASSERT(sizeof(vu_new_badwords) == 16);
+ PKE_MEM_WRITE(me, vu_addr,
+ &vu_new_badwords, 16);
+ }
+
+ /* write tracking address */
+ ASSERT(sizeof(unsigned_4) == 4);
+ PKE_MEM_WRITE(me, vutrack_addr,
+ & source_addr,
+ 4);
+
+ /* next vector please */
+ vector_num_out ++;
+ } /* vector transfer loop */
+ while(PKE_REG_MASK_GET(me, NUM, NUM) > 0);
+
+ /* confirm we've written as many vectors as told */
+ ASSERT(nummx == vector_num_out);
+
+ /* done */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ pke_pc_advance(me, 1 + num_operands);
+ } /* PKE FIFO full enough */
+ else
+ {
+ /* need to wait for another word */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
+ /* retry this instruction next clock */
+ }
+}
+
+
+void
+pke_code_error(struct pke_device* me, unsigned_4 pkecode)
+{
+ /* set ER1 flag in STAT register */
+ PKE_REG_MASK_SET(me, STAT, ER1, 1);
+
+ if(! PKE_REG_MASK_GET(me, ERR, ME1))
+ {
+ pke_begin_interrupt_stall(me);
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
+ }
+ else
+ {
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
+ }