1 /* Copyright (C) 1998, Cygnus Solutions */
12 #include "sim-assert.h"
15 #include "sky-gpuif.h"
16 #include "sky-device.h"
19 /* Internal function declarations */
21 static int pke_io_read_buffer(device
*, void*, int, address_word
,
22 unsigned, sim_cpu
*, sim_cia
);
23 static int pke_io_write_buffer(device
*, const void*, int, address_word
,
24 unsigned, sim_cpu
*, sim_cia
);
25 static void pke_issue(SIM_DESC
, struct pke_device
*);
26 static void pke_pc_advance(struct pke_device
*, int num_words
);
27 static unsigned_4
* pke_pc_operand(struct pke_device
*, int operand_num
);
28 static unsigned_4
pke_pc_operand_bits(struct pke_device
*, int bit_offset
,
29 int bit_width
, unsigned_4
* sourceaddr
);
30 static struct fifo_quadword
* pke_pc_fifo(struct pke_device
*, int operand_num
,
31 unsigned_4
** operand
);
32 static void pke_attach(SIM_DESC sd
, struct pke_device
* me
);
33 enum pke_check_target
{ chk_vu
, chk_path1
, chk_path2
, chk_path3
};
34 static int pke_check_stall(struct pke_device
* me
, enum pke_check_target what
);
35 static void pke_flip_dbf(struct pke_device
* me
);
36 /* PKEcode handlers */
37 static void pke_code_nop(struct pke_device
* me
, unsigned_4 pkecode
);
38 static void pke_code_stcycl(struct pke_device
* me
, unsigned_4 pkecode
);
39 static void pke_code_offset(struct pke_device
* me
, unsigned_4 pkecode
);
40 static void pke_code_base(struct pke_device
* me
, unsigned_4 pkecode
);
41 static void pke_code_itop(struct pke_device
* me
, unsigned_4 pkecode
);
42 static void pke_code_stmod(struct pke_device
* me
, unsigned_4 pkecode
);
43 static void pke_code_mskpath3(struct pke_device
* me
, unsigned_4 pkecode
);
44 static void pke_code_pkemark(struct pke_device
* me
, unsigned_4 pkecode
);
45 static void pke_code_flushe(struct pke_device
* me
, unsigned_4 pkecode
);
46 static void pke_code_flush(struct pke_device
* me
, unsigned_4 pkecode
);
47 static void pke_code_flusha(struct pke_device
* me
, unsigned_4 pkecode
);
48 static void pke_code_pkemscal(struct pke_device
* me
, unsigned_4 pkecode
);
49 static void pke_code_pkemscnt(struct pke_device
* me
, unsigned_4 pkecode
);
50 static void pke_code_pkemscalf(struct pke_device
* me
, unsigned_4 pkecode
);
51 static void pke_code_stmask(struct pke_device
* me
, unsigned_4 pkecode
);
52 static void pke_code_strow(struct pke_device
* me
, unsigned_4 pkecode
);
53 static void pke_code_stcol(struct pke_device
* me
, unsigned_4 pkecode
);
54 static void pke_code_mpg(struct pke_device
* me
, unsigned_4 pkecode
);
55 static void pke_code_direct(struct pke_device
* me
, unsigned_4 pkecode
);
56 static void pke_code_directhl(struct pke_device
* me
, unsigned_4 pkecode
);
57 static void pke_code_unpack(struct pke_device
* me
, unsigned_4 pkecode
);
58 static void pke_code_error(struct pke_device
* me
, unsigned_4 pkecode
);
64 struct pke_device pke0_device
=
66 { "pke0", &pke_io_read_buffer
, &pke_io_write_buffer
}, /* device */
69 {}, 0, /* FIFO write buffer */
70 NULL
, 0, 0, NULL
, /* FIFO */
75 struct pke_device pke1_device
=
77 { "pke1", &pke_io_read_buffer
, &pke_io_write_buffer
}, /* device */
80 {}, 0, /* FIFO write buffer */
81 NULL
, 0, 0, NULL
, /* FIFO */
87 /* External functions */
90 /* Attach PKE addresses to main memory */
93 pke0_attach(SIM_DESC sd
)
95 pke_attach(sd
, & pke0_device
);
99 pke1_attach(SIM_DESC sd
)
101 pke_attach(sd
, & pke1_device
);
106 /* Issue a PKE instruction if possible */
109 pke0_issue(SIM_DESC sd
)
111 pke_issue(sd
, & pke0_device
);
115 pke1_issue(SIM_DESC sd
)
117 pke_issue(sd
, & pke1_device
);
122 /* Internal functions */
125 /* Attach PKE memory regions to simulator */
128 pke_attach(SIM_DESC sd
, struct pke_device
* me
)
131 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
132 (me
->pke_number
== 0) ? PKE0_REGISTER_WINDOW_START
: PKE1_REGISTER_WINDOW_START
,
133 PKE_REGISTER_WINDOW_SIZE
/*nr_bytes*/,
139 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
140 (me
->pke_number
== 0) ? PKE0_FIFO_ADDR
: PKE1_FIFO_ADDR
,
141 sizeof(quadword
) /*nr_bytes*/,
146 /* VU MEM0 tracking table */
147 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
148 ((me
->pke_number
== 0) ? VU0_MEM0_SRCADDR_START
: VU1_MEM0_SRCADDR_START
),
149 ((me
->pke_number
== 0) ? VU0_MEM0_SIZE
: VU1_MEM0_SIZE
) / 2,
154 /* VU MEM1 tracking table */
155 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
156 ((me
->pke_number
== 0) ? VU0_MEM1_SRCADDR_START
: VU1_MEM1_SRCADDR_START
),
157 ((me
->pke_number
== 0) ? VU0_MEM1_SIZE
: VU1_MEM1_SIZE
) / 4,
163 /* attach to trace file if appropriate */
165 char trace_envvar
[80];
166 char* trace_filename
= NULL
;
167 sprintf(trace_envvar
, "VIF%d_TRACE_FILE", me
->pke_number
);
168 trace_filename
= getenv(trace_envvar
);
169 if(trace_filename
!= NULL
)
171 me
->fifo_trace_file
= fopen(trace_filename
, "w");
172 if(me
->fifo_trace_file
== NULL
)
173 perror("VIF FIFO trace error on fopen");
175 setvbuf(me
->fifo_trace_file
, NULL
, _IOLBF
, 0);
182 /* Handle a PKE read; return no. of bytes read */
185 pke_io_read_buffer(device
*me_
,
193 /* downcast to gather embedding pke_device struct */
194 struct pke_device
* me
= (struct pke_device
*) me_
;
196 /* find my address ranges */
197 address_word my_reg_start
=
198 (me
->pke_number
== 0) ? PKE0_REGISTER_WINDOW_START
: PKE1_REGISTER_WINDOW_START
;
199 address_word my_fifo_addr
=
200 (me
->pke_number
== 0) ? PKE0_FIFO_ADDR
: PKE1_FIFO_ADDR
;
202 /* enforce that an access does not span more than one quadword */
203 address_word low
= ADDR_TRUNC_QW(addr
);
204 address_word high
= ADDR_TRUNC_QW(addr
+ nr_bytes
- 1);
208 /* classify address & handle */
209 if((addr
>= my_reg_start
) && (addr
< my_reg_start
+ PKE_REGISTER_WINDOW_SIZE
))
212 int reg_num
= ADDR_TRUNC_QW(addr
- my_reg_start
) >> 4;
213 int reg_byte
= ADDR_OFFSET_QW(addr
); /* find byte-offset inside register bank */
218 result
[0] = result
[1] = result
[2] = result
[3] = 0;
220 /* handle reads to individual registers; clear `readable' on error */
223 /* handle common case of register reading, side-effect free */
224 /* PKE1-only registers*/
230 if(me
->pke_number
== 0)
233 /* PKE0 & PKE1 common registers*/
252 result
[0] = H2T_4(me
->regs
[reg_num
][0]);
255 /* handle common case of write-only registers */
261 ASSERT(0); /* test above should prevent this possibility */
264 /* perform transfer & return */
268 memcpy(dest
, ((unsigned_1
*) &result
) + reg_byte
, nr_bytes
);
273 /* return zero bits */
274 memset(dest
, 0, nr_bytes
);
280 else if(addr
>= my_fifo_addr
&&
281 addr
< my_fifo_addr
+ sizeof(quadword
))
285 /* FIFO is not readable: return a word of zeroes */
286 memset(dest
, 0, nr_bytes
);
295 /* Handle a PKE read; return no. of bytes written */
298 pke_io_write_buffer(device
*me_
,
306 /* downcast to gather embedding pke_device struct */
307 struct pke_device
* me
= (struct pke_device
*) me_
;
309 /* find my address ranges */
310 address_word my_reg_start
=
311 (me
->pke_number
== 0) ? PKE0_REGISTER_WINDOW_START
: PKE1_REGISTER_WINDOW_START
;
312 address_word my_fifo_addr
=
313 (me
->pke_number
== 0) ? PKE0_FIFO_ADDR
: PKE1_FIFO_ADDR
;
315 /* enforce that an access does not span more than one quadword */
316 address_word low
= ADDR_TRUNC_QW(addr
);
317 address_word high
= ADDR_TRUNC_QW(addr
+ nr_bytes
- 1);
321 /* classify address & handle */
322 if((addr
>= my_reg_start
) && (addr
< my_reg_start
+ PKE_REGISTER_WINDOW_SIZE
))
325 int reg_num
= ADDR_TRUNC_QW(addr
- my_reg_start
) >> 4;
326 int reg_byte
= ADDR_OFFSET_QW(addr
); /* find byte-offset inside register bank */
331 input
[0] = input
[1] = input
[2] = input
[3] = 0;
333 /* write user-given bytes into input */
334 memcpy(((unsigned_1
*) &input
) + reg_byte
, src
, nr_bytes
);
336 /* make words host-endian */
337 input
[0] = T2H_4(input
[0]);
338 /* we may ignore other words */
340 /* handle writes to individual registers; clear `writeable' on error */
344 /* Order these tests from least to most overriding, in case
345 multiple bits are set. */
346 if(BIT_MASK_GET(input
[0], PKE_REG_FBRST_STC_B
, PKE_REG_FBRST_STC_E
))
348 /* clear a bunch of status bits */
349 PKE_REG_MASK_SET(me
, STAT
, PSS
, 0);
350 PKE_REG_MASK_SET(me
, STAT
, PFS
, 0);
351 PKE_REG_MASK_SET(me
, STAT
, PIS
, 0);
352 PKE_REG_MASK_SET(me
, STAT
, INT
, 0);
353 PKE_REG_MASK_SET(me
, STAT
, ER0
, 0);
354 PKE_REG_MASK_SET(me
, STAT
, ER1
, 0);
355 me
->flags
&= ~PKE_FLAG_PENDING_PSS
;
356 /* will allow resumption of possible stalled instruction */
358 if(BIT_MASK_GET(input
[0], PKE_REG_FBRST_STP_B
, PKE_REG_FBRST_STP_E
))
360 me
->flags
|= PKE_FLAG_PENDING_PSS
;
362 if(BIT_MASK_GET(input
[0], PKE_REG_FBRST_FBK_B
, PKE_REG_FBRST_FBK_E
))
364 PKE_REG_MASK_SET(me
, STAT
, PFS
, 1);
366 if(BIT_MASK_GET(input
[0], PKE_REG_FBRST_RST_B
, PKE_REG_FBRST_RST_E
))
368 /* clear FIFO by skipping to word after PC: also
369 prevents re-execution attempt of possible stalled
371 me
->fifo_num_elements
= me
->fifo_pc
;
372 /* clear registers, flag, other state */
373 memset(me
->regs
, 0, sizeof(me
->regs
));
374 me
->fifo_qw_done
= 0;
381 /* copy bottom three bits */
382 BIT_MASK_SET(me
->regs
[PKE_REG_ERR
][0], 0, 2, BIT_MASK_GET(input
[0], 0, 2));
386 /* copy bottom sixteen bits */
387 PKE_REG_MASK_SET(me
, MARK
, MARK
, BIT_MASK_GET(input
[0], 0, 15));
388 /* reset MRK bit in STAT */
389 PKE_REG_MASK_SET(me
, STAT
, MRK
, 0);
392 /* handle common case of read-only registers */
393 /* PKE1-only registers - not really necessary to handle separately */
399 if(me
->pke_number
== 0)
402 /* PKE0 & PKE1 common registers*/
404 /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */
424 ASSERT(0); /* test above should prevent this possibility */
437 else if(addr
>= my_fifo_addr
&&
438 addr
< my_fifo_addr
+ sizeof(quadword
))
441 struct fifo_quadword
* fqw
;
442 int fifo_byte
= ADDR_OFFSET_QW(addr
); /* find byte-offset inside fifo quadword */
443 unsigned_4 dma_tag_present
= 0;
446 /* collect potentially-partial quadword in write buffer; LE byte order */
447 memcpy(((unsigned_1
*)& me
->fifo_qw_in_progress
) + fifo_byte
, src
, nr_bytes
);
448 /* mark bytes written */
449 for(i
= fifo_byte
; i
< fifo_byte
+ nr_bytes
; i
++)
450 BIT_MASK_SET(me
->fifo_qw_done
, i
, i
, 1);
452 /* return if quadword not quite written yet */
453 if(BIT_MASK_GET(me
->fifo_qw_done
, 0, sizeof(quadword
)-1) !=
454 BIT_MASK_BTW(0, sizeof(quadword
)-1))
457 /* all done - process quadword after clearing flag */
458 BIT_MASK_SET(me
->fifo_qw_done
, 0, sizeof(quadword
)-1, 0);
460 /* ensure FIFO has enough elements */
461 if(me
->fifo_num_elements
== me
->fifo_buffer_size
)
464 int new_fifo_buffer_size
= me
->fifo_buffer_size
+ 20;
465 void* ptr
= realloc((void*) me
->fifo
, new_fifo_buffer_size
*sizeof(struct fifo_quadword
));
469 /* oops, cannot enlarge FIFO any more */
470 device_error(me_
, "Cannot enlarge FIFO buffer\n");
475 me
->fifo_buffer_size
= new_fifo_buffer_size
;
478 /* add new quadword at end of FIFO; store data in host-endian */
479 fqw
= & me
->fifo
[me
->fifo_num_elements
];
480 fqw
->word_class
[0] = fqw
->word_class
[1] =
481 fqw
->word_class
[2] = fqw
->word_class
[3] = wc_unknown
;
482 fqw
->data
[0] = T2H_4(me
->fifo_qw_in_progress
[0]);
483 fqw
->data
[1] = T2H_4(me
->fifo_qw_in_progress
[1]);
484 fqw
->data
[2] = T2H_4(me
->fifo_qw_in_progress
[2]);
485 fqw
->data
[3] = T2H_4(me
->fifo_qw_in_progress
[3]);
486 ASSERT(sizeof(unsigned_4
) == 4);
487 PKE_MEM_READ(me
, (me
->pke_number
== 0 ? DMA_D0_MADR
: DMA_D1_MADR
),
488 & fqw
->source_address
, /* converted to host-endian */
490 PKE_MEM_READ(me
, (me
->pke_number
== 0 ? DMA_D0_PKTFLAG
: DMA_D1_PKTFLAG
),
496 /* lower two words are DMA tags */
497 fqw
->word_class
[0] = fqw
->word_class
[1] = wc_dma
;
500 me
->fifo_num_elements
++;
502 /* set FQC to "1" as FIFO is now not empty */
503 PKE_REG_MASK_SET(me
, STAT
, FQC
, 1);
515 /* Issue & swallow next PKE opcode if possible/available */
518 pke_issue(SIM_DESC sd
, struct pke_device
* me
)
520 struct fifo_quadword
* fqw
;
522 unsigned_4 cmd
, intr
, num
;
525 /* 1 -- fetch PKE instruction */
527 /* confirm availability of new quadword of PKE instructions */
528 if(me
->fifo_num_elements
<= me
->fifo_pc
)
531 /* skip over DMA tag, if present */
532 pke_pc_advance(me
, 0);
534 /* "fetch" instruction quadword and word */
535 fqw
= & me
->fifo
[me
->fifo_pc
];
536 fw
= fqw
->data
[me
->qw_pc
];
538 /* store word in PKECODE register */
539 me
->regs
[PKE_REG_CODE
][0] = fw
;
542 /* 2 -- test go / no-go for PKE execution */
544 /* switch on STAT:PSS if PSS-pending and in idle state */
545 if((PKE_REG_MASK_GET(me
, STAT
, PPS
) == PKE_REG_STAT_PPS_IDLE
) &&
546 (me
->flags
& PKE_FLAG_PENDING_PSS
) != 0)
548 me
->flags
&= ~PKE_FLAG_PENDING_PSS
;
549 PKE_REG_MASK_SET(me
, STAT
, PSS
, 1);
552 /* check for stall/halt control bits */
553 if(PKE_REG_MASK_GET(me
, STAT
, PFS
) ||
554 PKE_REG_MASK_GET(me
, STAT
, PSS
) || /* note special treatment below */
555 /* PEW bit not a reason to keep stalling - it's re-checked below */
556 /* PGW bit not a reason to keep stalling - it's re-checked below */
557 /* maskable stall controls: ER0, ER1, PIS */
558 PKE_REG_MASK_GET(me
, STAT
, ER0
) ||
559 PKE_REG_MASK_GET(me
, STAT
, ER1
) ||
560 PKE_REG_MASK_GET(me
, STAT
, PIS
))
562 /* (still) stalled */
563 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
564 /* try again next cycle */
569 /* 3 -- decode PKE instruction */
572 if(PKE_REG_MASK_GET(me
, STAT
, PPS
) == PKE_REG_STAT_PPS_IDLE
)
573 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_DECODE
);
575 /* Extract relevant bits from PKEcode */
576 intr
= BIT_MASK_GET(fw
, PKE_OPCODE_I_B
, PKE_OPCODE_I_E
);
577 cmd
= BIT_MASK_GET(fw
, PKE_OPCODE_CMD_B
, PKE_OPCODE_CMD_E
);
579 /* handle interrupts */
582 /* are we resuming an interrupt-stalled instruction? */
583 if(me
->flags
& PKE_FLAG_INT_NOLOOP
)
585 /* clear loop-prevention flag */
586 me
->flags
&= ~PKE_FLAG_INT_NOLOOP
;
588 /* fall through to decode & execute */
589 /* The pke_code_* functions should not check the MSB in the
592 else /* new interrupt-flagged instruction */
594 /* XXX: send interrupt to 5900? */
596 /* set INT flag in STAT register */
597 PKE_REG_MASK_SET(me
, STAT
, INT
, 1);
598 /* set loop-prevention flag */
599 me
->flags
|= PKE_FLAG_INT_NOLOOP
;
601 /* set PIS if stall not masked */
602 if(!PKE_REG_MASK_GET(me
, ERR
, MII
))
603 PKE_REG_MASK_SET(me
, STAT
, PIS
, 1);
605 /* suspend this instruction unless it's PKEMARK */
606 if(!IS_PKE_CMD(cmd
, PKEMARK
))
608 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
613 ; /* fall through to decode & execute */
619 /* decode & execute */
620 if(IS_PKE_CMD(cmd
, PKENOP
))
621 pke_code_nop(me
, fw
);
622 else if(IS_PKE_CMD(cmd
, STCYCL
))
623 pke_code_stcycl(me
, fw
);
624 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, OFFSET
))
625 pke_code_offset(me
, fw
);
626 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, BASE
))
627 pke_code_base(me
, fw
);
628 else if(IS_PKE_CMD(cmd
, ITOP
))
629 pke_code_itop(me
, fw
);
630 else if(IS_PKE_CMD(cmd
, STMOD
))
631 pke_code_stmod(me
, fw
);
632 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, MSKPATH3
))
633 pke_code_mskpath3(me
, fw
);
634 else if(IS_PKE_CMD(cmd
, PKEMARK
))
635 pke_code_pkemark(me
, fw
);
636 else if(IS_PKE_CMD(cmd
, FLUSHE
))
637 pke_code_flushe(me
, fw
);
638 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, FLUSH
))
639 pke_code_flush(me
, fw
);
640 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, FLUSHA
))
641 pke_code_flusha(me
, fw
);
642 else if(IS_PKE_CMD(cmd
, PKEMSCAL
))
643 pke_code_pkemscal(me
, fw
);
644 else if(IS_PKE_CMD(cmd
, PKEMSCNT
))
645 pke_code_pkemscnt(me
, fw
);
646 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, PKEMSCALF
))
647 pke_code_pkemscalf(me
, fw
);
648 else if(IS_PKE_CMD(cmd
, STMASK
))
649 pke_code_stmask(me
, fw
);
650 else if(IS_PKE_CMD(cmd
, STROW
))
651 pke_code_strow(me
, fw
);
652 else if(IS_PKE_CMD(cmd
, STCOL
))
653 pke_code_stcol(me
, fw
);
654 else if(IS_PKE_CMD(cmd
, MPG
))
655 pke_code_mpg(me
, fw
);
656 else if(IS_PKE_CMD(cmd
, DIRECT
))
657 pke_code_direct(me
, fw
);
658 else if(IS_PKE_CMD(cmd
, DIRECTHL
))
659 pke_code_directhl(me
, fw
);
660 else if(IS_PKE_CMD(cmd
, UNPACK
))
661 pke_code_unpack(me
, fw
);
662 /* ... no other commands ... */
664 pke_code_error(me
, fw
);
669 /* advance the PC by given number of data words; update STAT/FQC
670 field; assume FIFO is filled enough; classify passed-over words;
671 write FIFO trace line */
674 pke_pc_advance(struct pke_device
* me
, int num_words
)
677 struct fifo_quadword
* fq
= NULL
;
678 ASSERT(num_words
>= 0);
680 /* printf("pke %d pc_advance num_words %d\n", me->pke_number, num_words); */
684 fq
= & me
->fifo
[me
->fifo_pc
];
686 /* skip over DMA tag words if present in word 0 or 1 */
687 if(fq
->word_class
[me
->qw_pc
] == wc_dma
)
689 /* skip by going around loop an extra time */
693 /* nothing left to skip / no DMA tag here */
697 /* one word skipped */
700 /* point to next word */
707 /* trace the consumption of the FIFO quadword we just skipped over */
708 /* fq still points to it */
709 if(me
->fifo_trace_file
!= NULL
)
711 /* assert complete classification */
712 ASSERT(fq
->word_class
[3] != wc_unknown
);
713 ASSERT(fq
->word_class
[2] != wc_unknown
);
714 ASSERT(fq
->word_class
[1] != wc_unknown
);
715 ASSERT(fq
->word_class
[0] != wc_unknown
);
717 /* print trace record */
718 fprintf(me
->fifo_trace_file
,
719 "%d 0x%08x_%08x_%08x_%08x 0x%08x %c%c%c%c\n",
720 (me
->pke_number
== 0 ? 0 : 1),
721 (unsigned) fq
->data
[3], (unsigned) fq
->data
[2],
722 (unsigned) fq
->data
[1], (unsigned) fq
->data
[0],
723 (unsigned) fq
->source_address
,
724 fq
->word_class
[3], fq
->word_class
[2],
725 fq
->word_class
[1], fq
->word_class
[0]);
728 /* XXX: zap old entries in FIFO */
729 } /* next quadword */
732 /* clear FQC if FIFO is now empty */
733 if(me
->fifo_num_elements
== me
->fifo_pc
)
735 PKE_REG_MASK_SET(me
, STAT
, FQC
, 0);
737 else /* annote the word where the PC lands as an PKEcode */
739 fq
= & me
->fifo
[me
->fifo_pc
];
740 ASSERT(fq
->word_class
[me
->qw_pc
] == wc_pkecode
||
741 fq
->word_class
[me
->qw_pc
] == wc_unknown
);
742 fq
->word_class
[me
->qw_pc
] = wc_pkecode
;
748 /* Return pointer to FIFO quadword containing given operand# in FIFO.
749 `operand_num' starts at 1. Return pointer to operand word in last
750 argument, if non-NULL. If FIFO is not full enough, return 0.
751 Signal an ER0 indication upon skipping a DMA tag. */
753 struct fifo_quadword
*
754 pke_pc_fifo(struct pke_device
* me
, int operand_num
, unsigned_4
** operand
)
756 int num
= operand_num
;
757 int new_qw_pc
, new_fifo_pc
;
758 struct fifo_quadword
* fq
= NULL
;
762 /* snapshot current pointers */
763 new_fifo_pc
= me
->fifo_pc
;
764 new_qw_pc
= me
->qw_pc
;
766 /* printf("pke %d pc_fifo operand_num %d\n", me->pke_number, operand_num); */
770 /* one word skipped */
773 /* point to next word */
781 /* check for FIFO underflow */
782 if(me
->fifo_num_elements
== new_fifo_pc
)
788 /* skip over DMA tag words if present in word 0 or 1 */
789 fq
= & me
->fifo
[new_fifo_pc
];
790 if(fq
->word_class
[new_qw_pc
] == wc_dma
)
792 /* mismatch error! */
793 if(! PKE_REG_MASK_GET(me
, ERR
, ME0
))
795 PKE_REG_MASK_SET(me
, STAT
, ER0
, 1);
796 /* don't stall just yet -- finish this instruction */
797 /* the PPS_STALL state will be entered by pke_issue() next time */
799 /* skip by going around loop an extra time */
805 /* return pointer to operand word itself */
808 *operand
= & fq
->data
[new_qw_pc
];
810 /* annote the word where the pseudo lands as an PKE operand */
811 ASSERT(fq
->word_class
[new_qw_pc
] == wc_pkedata
||
812 fq
->word_class
[new_qw_pc
] == wc_unknown
);
813 fq
->word_class
[new_qw_pc
] = wc_pkedata
;
820 /* Return pointer to given operand# in FIFO. `operand_num' starts at 1.
821 If FIFO is not full enough, return 0. Skip over DMA tags, but mark
822 them as an error (ER0). */
825 pke_pc_operand(struct pke_device
* me
, int operand_num
)
827 unsigned_4
* operand
= NULL
;
828 struct fifo_quadword
* fifo_operand
;
830 fifo_operand
= pke_pc_fifo(me
, operand_num
, & operand
);
832 if(fifo_operand
== NULL
)
833 ASSERT(operand
== NULL
); /* pke_pc_fifo() ought leave it untouched */
839 /* Return a bit-field extract of given operand# in FIFO, and its
840 source-addr. `bit_offset' starts at 0, referring to LSB after PKE
841 instruction word. Width must be >0, <=32. Assume FIFO is full
842 enough. Skip over DMA tags, but mark them as an error (ER0). */
845 pke_pc_operand_bits(struct pke_device
* me
, int bit_offset
, int bit_width
, unsigned_4
* source_addr
)
847 unsigned_4
* word
= NULL
;
849 struct fifo_quadword
* fifo_operand
;
850 int wordnumber
, bitnumber
;
852 wordnumber
= bit_offset
/32;
853 bitnumber
= bit_offset
%32;
855 /* find operand word with bitfield */
856 fifo_operand
= pke_pc_fifo(me
, wordnumber
+ 1, &word
);
857 ASSERT(word
!= NULL
);
859 /* extract bitfield from word */
860 value
= BIT_MASK_GET(*word
, bitnumber
, bitnumber
+ bit_width
- 1);
862 /* extract source addr from fifo word */
863 *source_addr
= fifo_operand
->source_address
;
870 /* check for stall conditions on indicated devices (path* only on
871 PKE1), do not change status; return 0 iff no stall */
873 pke_check_stall(struct pke_device
* me
, enum pke_check_target what
)
876 unsigned_4 cop2_stat
, gpuif_stat
;
878 /* read status words */
879 ASSERT(sizeof(unsigned_4
) == 4);
880 PKE_MEM_READ(me
, (GIF_REG_STAT
),
883 PKE_MEM_READ(me
, (COP2_REG_STAT_ADDR
),
890 if(me
->pke_number
== 0)
891 any_stall
= BIT_MASK_GET(cop2_stat
, COP2_REG_STAT_VBS0_B
, COP2_REG_STAT_VBS0_E
);
892 else /* if(me->pke_number == 1) */
893 any_stall
= BIT_MASK_GET(cop2_stat
, COP2_REG_STAT_VBS1_B
, COP2_REG_STAT_VBS1_E
);
895 else if(what
== chk_path1
) /* VU -> GPUIF */
897 if(BIT_MASK_GET(gpuif_stat
, GPUIF_REG_STAT_APATH_B
, GPUIF_REG_STAT_APATH_E
) == 1)
900 else if(what
== chk_path2
) /* PKE -> GPUIF */
902 if(BIT_MASK_GET(gpuif_stat
, GPUIF_REG_STAT_APATH_B
, GPUIF_REG_STAT_APATH_E
) == 2)
905 else if(what
== chk_path3
) /* DMA -> GPUIF */
907 if(BIT_MASK_GET(gpuif_stat
, GPUIF_REG_STAT_APATH_B
, GPUIF_REG_STAT_APATH_E
) == 3)
916 /* any stall reasons? */
921 /* PKE1 only: flip the DBF bit; recompute TOPS, TOP */
923 pke_flip_dbf(struct pke_device
* me
)
926 /* compute new TOP */
927 PKE_REG_MASK_SET(me
, TOP
, TOP
,
928 PKE_REG_MASK_GET(me
, TOPS
, TOPS
));
930 newdf
= PKE_REG_MASK_GET(me
, DBF
, DF
) ? 0 : 1;
931 PKE_REG_MASK_SET(me
, DBF
, DF
, newdf
);
932 PKE_REG_MASK_SET(me
, STAT
, DBF
, newdf
);
933 /* compute new TOPS */
934 PKE_REG_MASK_SET(me
, TOPS
, TOPS
,
935 (PKE_REG_MASK_GET(me
, BASE
, BASE
) +
936 newdf
* PKE_REG_MASK_GET(me
, OFST
, OFFSET
)));
938 /* this is equivalent to last word from okadaa (98-02-25):
940 2) TOPS=BASE + !DBF*OFFSET
946 /* PKEcode handler functions -- responsible for checking and
947 confirming old stall conditions, executing pkecode, updating PC and
948 status registers -- may assume being run on correct PKE unit */
951 pke_code_nop(struct pke_device
* me
, unsigned_4 pkecode
)
954 pke_pc_advance(me
, 1);
955 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
960 pke_code_stcycl(struct pke_device
* me
, unsigned_4 pkecode
)
962 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
964 /* copy immediate value into CYCLE reg */
965 PKE_REG_MASK_SET(me
, CYCLE
, WL
, BIT_MASK_GET(imm
, 8, 15));
966 PKE_REG_MASK_SET(me
, CYCLE
, CL
, BIT_MASK_GET(imm
, 0, 7));
968 pke_pc_advance(me
, 1);
969 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
974 pke_code_offset(struct pke_device
* me
, unsigned_4 pkecode
)
976 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
978 /* copy 10 bits to OFFSET field */
979 PKE_REG_MASK_SET(me
, OFST
, OFFSET
, BIT_MASK_GET(imm
, 0, 9));
981 PKE_REG_MASK_SET(me
, DBF
, DF
, 0);
982 /* clear other DBF bit */
983 PKE_REG_MASK_SET(me
, STAT
, DBF
, 0);
984 /* set TOPS = BASE */
985 PKE_REG_MASK_SET(me
, TOPS
, TOPS
, PKE_REG_MASK_GET(me
, BASE
, BASE
));
987 pke_pc_advance(me
, 1);
988 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
993 pke_code_base(struct pke_device
* me
, unsigned_4 pkecode
)
995 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
997 /* copy 10 bits to BASE field */
998 PKE_REG_MASK_SET(me
, BASE
, BASE
, BIT_MASK_GET(imm
, 0, 9));
1000 pke_pc_advance(me
, 1);
1001 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1006 pke_code_itop(struct pke_device
* me
, unsigned_4 pkecode
)
1008 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1010 /* copy 10 bits to ITOPS field */
1011 PKE_REG_MASK_SET(me
, ITOPS
, ITOPS
, BIT_MASK_GET(imm
, 0, 9));
1013 pke_pc_advance(me
, 1);
1014 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1019 pke_code_stmod(struct pke_device
* me
, unsigned_4 pkecode
)
1021 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1023 /* copy 2 bits to MODE register */
1024 PKE_REG_MASK_SET(me
, MODE
, MDE
, BIT_MASK_GET(imm
, 0, 2));
1026 pke_pc_advance(me
, 1);
1027 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1032 pke_code_mskpath3(struct pke_device
* me
, unsigned_4 pkecode
)
1034 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1035 unsigned_4 gif_mode
;
1037 /* set appropriate bit */
1038 if(BIT_MASK_GET(imm
, PKE_REG_MSKPATH3_B
, PKE_REG_MSKPATH3_E
) != 0)
1039 gif_mode
= GIF_REG_MODE_M3R_MASK
;
1043 /* write register; patrickm code will look at M3R bit only */
1044 PKE_MEM_WRITE(me
, GIF_REG_MODE
, & gif_mode
, 4);
1047 pke_pc_advance(me
, 1);
1048 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1053 pke_code_pkemark(struct pke_device
* me
, unsigned_4 pkecode
)
1055 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1056 /* copy 16 bits to MARK register */
1057 PKE_REG_MASK_SET(me
, MARK
, MARK
, BIT_MASK_GET(imm
, 0, 15));
1058 /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */
1059 PKE_REG_MASK_SET(me
, STAT
, MRK
, 1);
1061 pke_pc_advance(me
, 1);
1062 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1067 pke_code_flushe(struct pke_device
* me
, unsigned_4 pkecode
)
1069 /* compute next PEW bit */
1070 if(pke_check_stall(me
, chk_vu
))
1073 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1074 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1075 /* try again next cycle */
1080 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1081 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1082 pke_pc_advance(me
, 1);
1088 pke_code_flush(struct pke_device
* me
, unsigned_4 pkecode
)
1090 int something_busy
= 0;
1092 /* compute next PEW, PGW bits */
1093 if(pke_check_stall(me
, chk_vu
))
1096 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1099 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1102 if(pke_check_stall(me
, chk_path1
) ||
1103 pke_check_stall(me
, chk_path2
))
1106 PKE_REG_MASK_SET(me
, STAT
, PGW
, 1);
1109 PKE_REG_MASK_SET(me
, STAT
, PGW
, 0);
1114 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1115 /* try again next cycle */
1120 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1121 pke_pc_advance(me
, 1);
1127 pke_code_flusha(struct pke_device
* me
, unsigned_4 pkecode
)
1129 int something_busy
= 0;
1131 /* compute next PEW, PGW bits */
1132 if(pke_check_stall(me
, chk_vu
))
1135 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1138 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1141 if(pke_check_stall(me
, chk_path1
) ||
1142 pke_check_stall(me
, chk_path2
) ||
1143 pke_check_stall(me
, chk_path3
))
1146 PKE_REG_MASK_SET(me
, STAT
, PGW
, 1);
1149 PKE_REG_MASK_SET(me
, STAT
, PGW
, 0);
1153 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1154 /* try again next cycle */
1159 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1160 pke_pc_advance(me
, 1);
1166 pke_code_pkemscal(struct pke_device
* me
, unsigned_4 pkecode
)
1168 /* compute next PEW bit */
1169 if(pke_check_stall(me
, chk_vu
))
1172 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1173 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1174 /* try again next cycle */
1179 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1182 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1184 /* flip DBF on PKE1 */
1185 if(me
->pke_number
== 1)
1188 /* compute new PC for VU (host byte-order) */
1189 vu_pc
= BIT_MASK_GET(imm
, 0, 15);
1190 vu_pc
= T2H_4(vu_pc
);
1192 /* write new PC; callback function gets VU running */
1193 ASSERT(sizeof(unsigned_4
) == 4);
1194 PKE_MEM_WRITE(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1198 /* copy ITOPS field to ITOP */
1199 PKE_REG_MASK_SET(me
, ITOP
, ITOP
, PKE_REG_MASK_GET(me
, ITOPS
, ITOPS
));
1202 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1203 pke_pc_advance(me
, 1);
1210 pke_code_pkemscnt(struct pke_device
* me
, unsigned_4 pkecode
)
1212 /* compute next PEW bit */
1213 if(pke_check_stall(me
, chk_vu
))
1216 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1217 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1218 /* try again next cycle */
1225 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1227 /* flip DBF on PKE1 */
1228 if(me
->pke_number
== 1)
1232 ASSERT(sizeof(unsigned_4
) == 4);
1233 PKE_MEM_READ(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1237 /* rewrite new PC; callback function gets VU running */
1238 ASSERT(sizeof(unsigned_4
) == 4);
1239 PKE_MEM_WRITE(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1243 /* copy ITOPS field to ITOP */
1244 PKE_REG_MASK_SET(me
, ITOP
, ITOP
, PKE_REG_MASK_GET(me
, ITOPS
, ITOPS
));
1247 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1248 pke_pc_advance(me
, 1);
1254 pke_code_pkemscalf(struct pke_device
* me
, unsigned_4 pkecode
)
1256 int something_busy
= 0;
1258 /* compute next PEW, PGW bits */
1259 if(pke_check_stall(me
, chk_vu
))
1262 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1265 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1268 if(pke_check_stall(me
, chk_path1
) ||
1269 pke_check_stall(me
, chk_path2
) ||
1270 pke_check_stall(me
, chk_path3
))
1273 PKE_REG_MASK_SET(me
, STAT
, PGW
, 1);
1276 PKE_REG_MASK_SET(me
, STAT
, PGW
, 0);
1281 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1282 /* try again next cycle */
1287 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1289 /* flip DBF on PKE1 */
1290 if(me
->pke_number
== 1)
1293 /* compute new PC for VU (host byte-order) */
1294 vu_pc
= BIT_MASK_GET(imm
, 0, 15);
1295 vu_pc
= T2H_4(vu_pc
);
1297 /* rewrite new PC; callback function gets VU running */
1298 ASSERT(sizeof(unsigned_4
) == 4);
1299 PKE_MEM_WRITE(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1303 /* copy ITOPS field to ITOP */
1304 PKE_REG_MASK_SET(me
, ITOP
, ITOP
, PKE_REG_MASK_GET(me
, ITOPS
, ITOPS
));
1307 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1308 pke_pc_advance(me
, 1);
1314 pke_code_stmask(struct pke_device
* me
, unsigned_4 pkecode
)
1318 /* check that FIFO has one more word for STMASK operand */
1319 mask
= pke_pc_operand(me
, 1);
1322 /* "transferring" operand */
1323 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1326 PKE_REG_MASK_SET(me
, NUM
, NUM
, 1);
1328 /* fill the register */
1329 PKE_REG_MASK_SET(me
, MASK
, MASK
, *mask
);
1332 PKE_REG_MASK_SET(me
, NUM
, NUM
, 0);
1335 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1336 pke_pc_advance(me
, 2);
1340 /* need to wait for another word */
1341 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1342 /* try again next cycle */
1348 pke_code_strow(struct pke_device
* me
, unsigned_4 pkecode
)
1350 /* check that FIFO has four more words for STROW operand */
1351 unsigned_4
* last_op
;
1353 last_op
= pke_pc_operand(me
, 4);
1356 /* "transferring" operand */
1357 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1360 PKE_REG_MASK_SET(me
, NUM
, NUM
, 1);
1362 /* copy ROW registers: must all exist if 4th operand exists */
1363 me
->regs
[PKE_REG_R0
][0] = * pke_pc_operand(me
, 1);
1364 me
->regs
[PKE_REG_R1
][0] = * pke_pc_operand(me
, 2);
1365 me
->regs
[PKE_REG_R2
][0] = * pke_pc_operand(me
, 3);
1366 me
->regs
[PKE_REG_R3
][0] = * pke_pc_operand(me
, 4);
1369 PKE_REG_MASK_SET(me
, NUM
, NUM
, 0);
1372 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1373 pke_pc_advance(me
, 5);
1377 /* need to wait for another word */
1378 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1379 /* try again next cycle */
1385 pke_code_stcol(struct pke_device
* me
, unsigned_4 pkecode
)
1387 /* check that FIFO has four more words for STCOL operand */
1388 unsigned_4
* last_op
;
1390 last_op
= pke_pc_operand(me
, 4);
1393 /* "transferring" operand */
1394 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1397 PKE_REG_MASK_SET(me
, NUM
, NUM
, 1);
1399 /* copy COL registers: must all exist if 4th operand exists */
1400 me
->regs
[PKE_REG_C0
][0] = * pke_pc_operand(me
, 1);
1401 me
->regs
[PKE_REG_C1
][0] = * pke_pc_operand(me
, 2);
1402 me
->regs
[PKE_REG_C2
][0] = * pke_pc_operand(me
, 3);
1403 me
->regs
[PKE_REG_C3
][0] = * pke_pc_operand(me
, 4);
1406 PKE_REG_MASK_SET(me
, NUM
, NUM
, 0);
1409 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1410 pke_pc_advance(me
, 5);
1414 /* need to wait for another word */
1415 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1416 /* try again next cycle */
1422 pke_code_mpg(struct pke_device
* me
, unsigned_4 pkecode
)
1424 unsigned_4
* last_mpg_word
;
1425 int num
= BIT_MASK_GET(pkecode
, PKE_OPCODE_NUM_B
, PKE_OPCODE_NUM_E
);
1426 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1428 /* assert 64-bit alignment of MPG operand */
1429 if(me
->qw_pc
!= 3 && me
->qw_pc
!= 1)
1430 return pke_code_error(me
, pkecode
);
1432 /* map zero to max+1 */
1433 if(num
==0) num
=0x100;
1435 /* check that FIFO has a few more words for MPG operand */
1436 last_mpg_word
= pke_pc_operand(me
, num
*2); /* num: number of 64-bit words */
1437 if(last_mpg_word
!= NULL
)
1439 /* perform implied FLUSHE */
1440 if(pke_check_stall(me
, chk_vu
))
1443 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1444 /* retry this instruction next clock */
1451 /* "transferring" operand */
1452 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1455 PKE_REG_MASK_SET(me
, NUM
, NUM
, num
);
1457 /* transfer VU instructions, one word-pair per iteration */
1458 for(i
=0; i
<num
; i
++)
1460 address_word vu_addr_base
, vu_addr
;
1461 address_word vutrack_addr_base
, vutrack_addr
;
1462 address_word vu_addr_max_size
;
1463 unsigned_4 vu_lower_opcode
, vu_upper_opcode
;
1464 unsigned_4
* operand
;
1465 struct fifo_quadword
* fq
;
1469 next_num
= PKE_REG_MASK_GET(me
, NUM
, NUM
) - 1;
1470 PKE_REG_MASK_SET(me
, NUM
, NUM
, next_num
);
1472 /* imm: in 64-bit units for MPG instruction */
1473 /* VU*_MEM0 : instruction memory */
1474 vu_addr_base
= (me
->pke_number
== 0) ?
1475 VU0_MEM0_WINDOW_START
: VU1_MEM0_WINDOW_START
;
1476 vu_addr_max_size
= (me
->pke_number
== 0) ?
1477 VU0_MEM0_SIZE
: VU1_MEM0_SIZE
;
1478 vutrack_addr_base
= (me
->pke_number
== 0) ?
1479 VU0_MEM0_SRCADDR_START
: VU1_MEM0_SRCADDR_START
;
1481 /* compute VU address for this word-pair */
1482 vu_addr
= vu_addr_base
+ (imm
+ i
) * 8;
1483 /* check for vu_addr overflow */
1484 while(vu_addr
>= vu_addr_base
+ vu_addr_max_size
)
1485 vu_addr
-= vu_addr_max_size
;
1487 /* compute VU tracking address */
1488 vutrack_addr
= vutrack_addr_base
+ ((signed_8
)vu_addr
- (signed_8
)vu_addr_base
) / 2;
1490 /* Fetch operand words; assume they are already little-endian for VU imem */
1491 fq
= pke_pc_fifo(me
, i
*2 + 1, & operand
);
1492 vu_lower_opcode
= *operand
;
1493 vu_upper_opcode
= *pke_pc_operand(me
, i
*2 + 2);
1495 /* write data into VU memory */
1496 /* lower (scalar) opcode comes in first word ; macro performs H2T! */
1497 PKE_MEM_WRITE(me
, vu_addr
,
1500 /* upper (vector) opcode comes in second word ; H2T */
1501 ASSERT(sizeof(unsigned_4
) == 4);
1502 PKE_MEM_WRITE(me
, vu_addr
+ 4,
1506 /* write tracking address in target byte-order */
1507 ASSERT(sizeof(unsigned_4
) == 4);
1508 PKE_MEM_WRITE(me
, vutrack_addr
,
1509 & fq
->source_address
,
1511 } /* VU xfer loop */
1514 ASSERT(PKE_REG_MASK_GET(me
, NUM
, NUM
) == 0);
1517 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1518 pke_pc_advance(me
, 1 + num
*2);
1520 } /* if FIFO full enough */
1523 /* need to wait for another word */
1524 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1525 /* retry this instruction next clock */
1531 pke_code_direct(struct pke_device
* me
, unsigned_4 pkecode
)
1533 /* check that FIFO has a few more words for DIRECT operand */
1534 unsigned_4
* last_direct_word
;
1535 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1537 /* assert 128-bit alignment of DIRECT operand */
1539 return pke_code_error(me
, pkecode
);
1541 /* map zero to max+1 */
1542 if(imm
==0) imm
=0x10000;
1544 last_direct_word
= pke_pc_operand(me
, imm
*4); /* imm: number of 128-bit words */
1545 if(last_direct_word
!= NULL
)
1549 unsigned_16 fifo_data
;
1551 /* "transferring" operand */
1552 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1554 /* transfer GPUIF quadwords, one word per iteration */
1555 for(i
=0; i
<imm
*4; i
++)
1557 unsigned_4
* operand
= pke_pc_operand(me
, 1+i
);
1559 /* collect word into quadword */
1560 *A4_16(&fifo_data
, 3 - (i
% 4)) = *operand
;
1562 /* write to GPUIF FIFO only with full quadword */
1565 ASSERT(sizeof(fifo_data
) == 16);
1566 PKE_MEM_WRITE(me
, GIF_PATH2_FIFO_ADDR
,
1569 } /* write collected quadword */
1571 } /* GPUIF xfer loop */
1574 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1575 pke_pc_advance(me
, 1 + imm
*4);
1576 } /* if FIFO full enough */
1579 /* need to wait for another word */
1580 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1581 /* retry this instruction next clock */
1587 pke_code_directhl(struct pke_device
* me
, unsigned_4 pkecode
)
1589 /* treat the same as DIRECTH */
1590 pke_code_direct(me
, pkecode
);
1595 pke_code_unpack(struct pke_device
* me
, unsigned_4 pkecode
)
1597 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1598 int cmd
= BIT_MASK_GET(pkecode
, PKE_OPCODE_CMD_B
, PKE_OPCODE_CMD_E
);
1599 int num
= BIT_MASK_GET(pkecode
, PKE_OPCODE_NUM_B
, PKE_OPCODE_NUM_E
);
1600 short vn
= BIT_MASK_GET(cmd
, 2, 3); /* unpack shape controls */
1601 short vl
= BIT_MASK_GET(cmd
, 0, 1);
1602 int m
= BIT_MASK_GET(cmd
, 4, 4);
1603 short cl
= PKE_REG_MASK_GET(me
, CYCLE
, CL
); /* cycle controls */
1604 short wl
= PKE_REG_MASK_GET(me
, CYCLE
, WL
);
1605 int r
= BIT_MASK_GET(imm
, 15, 15); /* indicator bits in imm value */
1606 int usn
= BIT_MASK_GET(imm
, 14, 14);
1608 int n
, num_operands
;
1609 unsigned_4
* last_operand_word
= NULL
;
1611 /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */
1615 n
= cl
* (num
/wl
) + PKE_LIMIT(num
% wl
, cl
);
1616 num_operands
= (31 + (32 >> vl
) * (vn
+1) * n
)/32; /* round up to next word */
1618 /* confirm that FIFO has enough words in it */
1619 if(num_operands
> 0)
1620 last_operand_word
= pke_pc_operand(me
, num_operands
);
1621 if(last_operand_word
!= NULL
|| num_operands
== 0)
1623 address_word vu_addr_base
, vutrack_addr_base
;
1624 address_word vu_addr_max_size
;
1625 int vector_num_out
, vector_num_in
;
1627 /* "transferring" operand */
1628 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1630 /* don't check whether VU is idle */
1632 /* compute VU address base */
1633 if(me
->pke_number
== 0)
1635 vu_addr_base
= VU0_MEM1_WINDOW_START
;
1636 vu_addr_max_size
= VU0_MEM1_SIZE
;
1637 vutrack_addr_base
= VU0_MEM1_SRCADDR_START
;
1642 vu_addr_base
= VU1_MEM1_WINDOW_START
;
1643 vu_addr_max_size
= VU1_MEM1_SIZE
;
1644 vutrack_addr_base
= VU1_MEM1_SRCADDR_START
;
1648 PKE_REG_MASK_SET(me
, NUM
, NUM
, num
== 0 ? 0x100 : num
);
1650 /* transfer given number of vectors */
1651 vector_num_out
= 0; /* output vector number being processed */
1652 vector_num_in
= 0; /* argument vector number being processed */
1655 quadword vu_old_data
;
1656 quadword vu_new_data
;
1657 quadword unpacked_data
;
1658 address_word vu_addr
;
1659 address_word vutrack_addr
;
1660 unsigned_4 source_addr
= 0;
1665 next_num
= PKE_REG_MASK_GET(me
, NUM
, NUM
) - 1;
1666 PKE_REG_MASK_SET(me
, NUM
, NUM
, next_num
);
1668 /* compute VU destination address, as bytes in R5900 memory */
1671 /* map zero to max+1 */
1672 int addrwl
= (wl
== 0) ? 0x0100 : wl
;
1673 vu_addr
= vu_addr_base
+ 16 * (BIT_MASK_GET(imm
, 0, 9) +
1674 (vector_num_out
/ addrwl
) * cl
+
1675 (vector_num_out
% addrwl
));
1678 vu_addr
= vu_addr_base
+ 16 * (BIT_MASK_GET(imm
, 0, 9) +
1681 /* handle "R" double-buffering bit */
1683 vu_addr
+= 16 * PKE_REG_MASK_GET(me
, TOPS
, TOPS
);
1685 /* check for vu_addr overflow */
1686 while(vu_addr
>= vu_addr_base
+ vu_addr_max_size
)
1687 vu_addr
-= vu_addr_max_size
;
1689 /* compute address of tracking table entry */
1690 vutrack_addr
= vutrack_addr_base
+ ((signed_8
)vu_addr
- (signed_8
)vu_addr_base
) / 4;
1692 /* read old VU data word at address; reverse words if needed */
1694 unsigned_16 vu_old_badwords
;
1695 ASSERT(sizeof(vu_old_badwords
) == 16);
1696 PKE_MEM_READ(me
, vu_addr
,
1697 &vu_old_badwords
, 16);
1698 vu_old_data
[0] = * A4_16(& vu_old_badwords
, 3);
1699 vu_old_data
[1] = * A4_16(& vu_old_badwords
, 2);
1700 vu_old_data
[2] = * A4_16(& vu_old_badwords
, 1);
1701 vu_old_data
[3] = * A4_16(& vu_old_badwords
, 0);
1704 /* For cyclic unpack, next operand quadword may come from instruction stream
1706 if((num
== 0 && cl
== 0 && wl
== 0) || /* shortcut clear */
1707 ((cl
< wl
) && ((vector_num_out
% wl
) >= cl
))) /* && short-circuit asserts wl != 0 */
1709 /* clear operand - used only in a "indeterminate" state */
1710 for(i
= 0; i
< 4; i
++)
1711 unpacked_data
[i
] = 0;
1715 /* compute packed vector dimensions */
1716 int vectorbits
, unitbits
;
1718 if(vl
< 3) /* PKE_UNPACK_*_{32,16,8} */
1720 unitbits
= (32 >> vl
);
1721 vectorbits
= unitbits
* (vn
+1);
1723 else if(vl
== 3 && vn
== 3) /* PKE_UNPACK_V4_5 */
1728 else /* illegal unpack variant */
1730 /* treat as illegal instruction */
1731 pke_code_error(me
, pkecode
);
1735 /* loop over columns */
1736 for(i
=0; i
<=vn
; i
++)
1740 /* offset in bits in current operand word */
1742 (vector_num_in
* vectorbits
) + (i
* unitbits
); /* # of bits from PKEcode */
1744 /* last unit of V4_5 is only one bit wide */
1745 if(vl
== 3 && vn
== 3 && i
== 3) /* PKE_UNPACK_V4_5 */
1748 /* fetch bitfield operand */
1749 operand
= pke_pc_operand_bits(me
, bitoffset
, unitbits
, & source_addr
);
1751 /* selectively sign-extend; not for V4_5 1-bit value */
1752 if(usn
|| unitbits
== 1)
1753 unpacked_data
[i
] = operand
;
1755 unpacked_data
[i
] = SEXT32(operand
, unitbits
-1);
1758 /* clear remaining top words in vector */
1760 unpacked_data
[i
] = 0;
1762 /* consumed a vector from the PKE instruction stream */
1764 } /* unpack word from instruction operand */
1766 /* compute replacement word */
1767 if(m
) /* use mask register? */
1769 /* compute index into mask register for this word */
1770 int addrwl
= (wl
== 0) ? 0x0100 : wl
;
1771 int mask_index
= PKE_LIMIT(vector_num_out
% addrwl
, 3);
1773 for(i
=0; i
<4; i
++) /* loop over columns */
1775 int mask_op
= PKE_MASKREG_GET(me
, mask_index
, i
);
1776 unsigned_4
* masked_value
= NULL
;
1777 unsigned_4 zero
= 0;
1781 case PKE_MASKREG_INPUT
:
1782 /* for vn == 0, all columns are copied from column 0 */
1784 masked_value
= & unpacked_data
[0];
1786 masked_value
= & zero
; /* arbitrary data: undefined in spec */
1788 masked_value
= & unpacked_data
[i
];
1791 case PKE_MASKREG_ROW
: /* exploit R0..R3 contiguity */
1792 masked_value
= & me
->regs
[PKE_REG_R0
+ i
][0];
1795 case PKE_MASKREG_COLUMN
: /* exploit C0..C3 contiguity */
1796 masked_value
= & me
->regs
[PKE_REG_C0
+ mask_index
][0];
1799 case PKE_MASKREG_NOTHING
:
1800 /* "write inhibit" by re-copying old data */
1801 masked_value
= & vu_old_data
[i
];
1806 /* no other cases possible */
1809 /* copy masked value for column */
1810 vu_new_data
[i
] = *masked_value
;
1811 } /* loop over columns */
1815 /* no mask - just copy over entire unpacked quadword */
1816 memcpy(vu_new_data
, unpacked_data
, sizeof(unpacked_data
));
1819 /* process STMOD register for accumulation operations */
1820 switch(PKE_REG_MASK_GET(me
, MODE
, MDE
))
1822 case PKE_MODE_ADDROW
: /* add row registers to output data */
1824 /* exploit R0..R3 contiguity */
1825 vu_new_data
[i
] += me
->regs
[PKE_REG_R0
+ i
][0];
1828 case PKE_MODE_ACCROW
: /* add row registers to output data; accumulate */
1831 /* exploit R0..R3 contiguity */
1832 vu_new_data
[i
] += me
->regs
[PKE_REG_R0
+ i
][0];
1833 me
->regs
[PKE_REG_R0
+ i
][0] = vu_new_data
[i
];
1837 case PKE_MODE_INPUT
: /* pass data through */
1842 /* write new VU data word at address; reverse words if needed */
1844 unsigned_16 vu_new_badwords
;
1845 * A4_16(& vu_new_badwords
, 3) = vu_new_data
[0];
1846 * A4_16(& vu_new_badwords
, 2) = vu_new_data
[1];
1847 * A4_16(& vu_new_badwords
, 1) = vu_new_data
[2];
1848 * A4_16(& vu_new_badwords
, 0) = vu_new_data
[3];
1849 ASSERT(sizeof(vu_new_badwords
) == 16);
1850 PKE_MEM_WRITE(me
, vu_addr
,
1851 &vu_new_badwords
, 16);
1854 /* write tracking address */
1855 ASSERT(sizeof(unsigned_4
) == 4);
1856 PKE_MEM_WRITE(me
, vutrack_addr
,
1860 /* next vector please */
1862 } /* vector transfer loop */
1863 while(PKE_REG_MASK_GET(me
, NUM
, NUM
) > 0);
1866 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1867 pke_pc_advance(me
, 1 + num_operands
);
1868 } /* PKE FIFO full enough */
1871 /* need to wait for another word */
1872 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1873 /* retry this instruction next clock */
1879 pke_code_error(struct pke_device
* me
, unsigned_4 pkecode
)
1881 if(! PKE_REG_MASK_GET(me
, ERR
, ME1
))
1883 /* set ER1 flag in STAT register */
1884 PKE_REG_MASK_SET(me
, STAT
, ER1
, 1);
1885 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1889 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1892 /* advance over faulty word */
1893 pke_pc_advance(me
, 1);