1 /* Copyright (C) 1998, Cygnus Solutions */
12 #include "sim-assert.h"
15 #include "sky-gpuif.h"
18 /* Imported functions */
20 void device_error (device
*me
, char* message
); /* device.c */
23 /* Internal function declarations */
25 static int pke_io_read_buffer(device
*, void*, int, address_word
,
26 unsigned, sim_cpu
*, sim_cia
);
27 static int pke_io_write_buffer(device
*, const void*, int, address_word
,
28 unsigned, sim_cpu
*, sim_cia
);
29 static void pke_issue(SIM_DESC
, struct pke_device
*);
30 static void pke_pc_advance(struct pke_device
*, int num_words
);
31 static unsigned_4
* pke_pc_operand(struct pke_device
*, int operand_num
);
32 static unsigned_4
pke_pc_operand_bits(struct pke_device
*, int bit_offset
,
33 int bit_width
, unsigned_4
* sourceaddr
);
34 static struct fifo_quadword
* pke_pc_fifo(struct pke_device
*, int operand_num
,
35 unsigned_4
** operand
);
36 static void pke_attach(SIM_DESC sd
, struct pke_device
* me
);
37 enum pke_check_target
{ chk_vu
, chk_path1
, chk_path2
, chk_path3
};
38 static int pke_check_stall(struct pke_device
* me
, enum pke_check_target what
);
39 static void pke_flip_dbf(struct pke_device
* me
);
40 /* PKEcode handlers */
41 static void pke_code_nop(struct pke_device
* me
, unsigned_4 pkecode
);
42 static void pke_code_stcycl(struct pke_device
* me
, unsigned_4 pkecode
);
43 static void pke_code_offset(struct pke_device
* me
, unsigned_4 pkecode
);
44 static void pke_code_base(struct pke_device
* me
, unsigned_4 pkecode
);
45 static void pke_code_itop(struct pke_device
* me
, unsigned_4 pkecode
);
46 static void pke_code_stmod(struct pke_device
* me
, unsigned_4 pkecode
);
47 static void pke_code_mskpath3(struct pke_device
* me
, unsigned_4 pkecode
);
48 static void pke_code_pkemark(struct pke_device
* me
, unsigned_4 pkecode
);
49 static void pke_code_flushe(struct pke_device
* me
, unsigned_4 pkecode
);
50 static void pke_code_flush(struct pke_device
* me
, unsigned_4 pkecode
);
51 static void pke_code_flusha(struct pke_device
* me
, unsigned_4 pkecode
);
52 static void pke_code_pkemscal(struct pke_device
* me
, unsigned_4 pkecode
);
53 static void pke_code_pkemscnt(struct pke_device
* me
, unsigned_4 pkecode
);
54 static void pke_code_pkemscalf(struct pke_device
* me
, unsigned_4 pkecode
);
55 static void pke_code_stmask(struct pke_device
* me
, unsigned_4 pkecode
);
56 static void pke_code_strow(struct pke_device
* me
, unsigned_4 pkecode
);
57 static void pke_code_stcol(struct pke_device
* me
, unsigned_4 pkecode
);
58 static void pke_code_mpg(struct pke_device
* me
, unsigned_4 pkecode
);
59 static void pke_code_direct(struct pke_device
* me
, unsigned_4 pkecode
);
60 static void pke_code_directhl(struct pke_device
* me
, unsigned_4 pkecode
);
61 static void pke_code_unpack(struct pke_device
* me
, unsigned_4 pkecode
);
62 static void pke_code_error(struct pke_device
* me
, unsigned_4 pkecode
);
68 struct pke_device pke0_device
=
70 { "pke0", &pke_io_read_buffer
, &pke_io_write_buffer
}, /* device */
73 {}, 0, /* FIFO write buffer */
74 NULL
, 0, 0, NULL
, /* FIFO */
79 struct pke_device pke1_device
=
81 { "pke1", &pke_io_read_buffer
, &pke_io_write_buffer
}, /* device */
84 {}, 0, /* FIFO write buffer */
85 NULL
, 0, 0, NULL
, /* FIFO */
91 /* External functions */
94 /* Attach PKE addresses to main memory */
97 pke0_attach(SIM_DESC sd
)
99 pke_attach(sd
, & pke0_device
);
103 pke1_attach(SIM_DESC sd
)
105 pke_attach(sd
, & pke1_device
);
110 /* Issue a PKE instruction if possible */
113 pke0_issue(SIM_DESC sd
)
115 pke_issue(sd
, & pke0_device
);
119 pke1_issue(SIM_DESC sd
)
121 pke_issue(sd
, & pke1_device
);
126 /* Internal functions */
129 /* Attach PKE memory regions to simulator */
132 pke_attach(SIM_DESC sd
, struct pke_device
* me
)
135 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
136 (me
->pke_number
== 0) ? PKE0_REGISTER_WINDOW_START
: PKE1_REGISTER_WINDOW_START
,
137 PKE_REGISTER_WINDOW_SIZE
/*nr_bytes*/,
143 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
144 (me
->pke_number
== 0) ? PKE0_FIFO_ADDR
: PKE1_FIFO_ADDR
,
145 sizeof(quadword
) /*nr_bytes*/,
150 /* VU MEM0 tracking table */
151 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
152 ((me
->pke_number
== 0) ? VU0_MEM0_SRCADDR_START
: VU1_MEM0_SRCADDR_START
),
153 ((me
->pke_number
== 0) ? VU0_MEM0_SIZE
: VU1_MEM0_SIZE
) / 2,
158 /* VU MEM1 tracking table */
159 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
160 ((me
->pke_number
== 0) ? VU0_MEM1_SRCADDR_START
: VU1_MEM1_SRCADDR_START
),
161 ((me
->pke_number
== 0) ? VU0_MEM1_SIZE
: VU1_MEM1_SIZE
) / 4,
167 /* attach to trace file if appropriate */
169 char trace_envvar
[80];
170 char* trace_filename
= NULL
;
171 sprintf(trace_envvar
, "VIF%d_TRACE_FILE", me
->pke_number
);
172 trace_filename
= getenv(trace_envvar
);
173 if(trace_filename
!= NULL
)
175 me
->fifo_trace_file
= fopen(trace_filename
, "w");
176 if(me
->fifo_trace_file
== NULL
)
177 perror("VIF FIFO trace error on fopen");
179 setvbuf(me
->fifo_trace_file
, NULL
, _IOLBF
, 0);
186 /* Handle a PKE read; return no. of bytes read */
189 pke_io_read_buffer(device
*me_
,
197 /* downcast to gather embedding pke_device struct */
198 struct pke_device
* me
= (struct pke_device
*) me_
;
200 /* find my address ranges */
201 address_word my_reg_start
=
202 (me
->pke_number
== 0) ? PKE0_REGISTER_WINDOW_START
: PKE1_REGISTER_WINDOW_START
;
203 address_word my_fifo_addr
=
204 (me
->pke_number
== 0) ? PKE0_FIFO_ADDR
: PKE1_FIFO_ADDR
;
206 /* enforce that an access does not span more than one quadword */
207 address_word low
= ADDR_TRUNC_QW(addr
);
208 address_word high
= ADDR_TRUNC_QW(addr
+ nr_bytes
- 1);
212 /* classify address & handle */
213 if((addr
>= my_reg_start
) && (addr
< my_reg_start
+ PKE_REGISTER_WINDOW_SIZE
))
216 int reg_num
= ADDR_TRUNC_QW(addr
- my_reg_start
) >> 4;
217 int reg_byte
= ADDR_OFFSET_QW(addr
); /* find byte-offset inside register bank */
222 result
[0] = result
[1] = result
[2] = result
[3] = 0;
224 /* handle reads to individual registers; clear `readable' on error */
227 /* handle common case of register reading, side-effect free */
228 /* PKE1-only registers*/
234 if(me
->pke_number
== 0)
237 /* PKE0 & PKE1 common registers*/
256 result
[0] = me
->regs
[reg_num
][0];
259 /* handle common case of write-only registers */
265 ASSERT(0); /* test above should prevent this possibility */
268 /* perform transfer & return */
272 memcpy(dest
, ((unsigned_1
*) &result
) + reg_byte
, nr_bytes
);
277 /* return zero bits */
278 memset(dest
, 0, nr_bytes
);
284 else if(addr
>= my_fifo_addr
&&
285 addr
< my_fifo_addr
+ sizeof(quadword
))
289 /* FIFO is not readable: return a word of zeroes */
290 memset(dest
, 0, nr_bytes
);
299 /* Handle a PKE read; return no. of bytes written */
302 pke_io_write_buffer(device
*me_
,
310 /* downcast to gather embedding pke_device struct */
311 struct pke_device
* me
= (struct pke_device
*) me_
;
313 /* find my address ranges */
314 address_word my_reg_start
=
315 (me
->pke_number
== 0) ? PKE0_REGISTER_WINDOW_START
: PKE1_REGISTER_WINDOW_START
;
316 address_word my_fifo_addr
=
317 (me
->pke_number
== 0) ? PKE0_FIFO_ADDR
: PKE1_FIFO_ADDR
;
319 /* enforce that an access does not span more than one quadword */
320 address_word low
= ADDR_TRUNC_QW(addr
);
321 address_word high
= ADDR_TRUNC_QW(addr
+ nr_bytes
- 1);
325 /* classify address & handle */
326 if((addr
>= my_reg_start
) && (addr
< my_reg_start
+ PKE_REGISTER_WINDOW_SIZE
))
329 int reg_num
= ADDR_TRUNC_QW(addr
- my_reg_start
) >> 4;
330 int reg_byte
= ADDR_OFFSET_QW(addr
); /* find byte-offset inside register bank */
335 input
[0] = input
[1] = input
[2] = input
[3] = 0;
337 /* write user-given bytes into input */
338 memcpy(((unsigned_1
*) &input
) + reg_byte
, src
, nr_bytes
);
340 /* handle writes to individual registers; clear `writeable' on error */
344 /* Order these tests from least to most overriding, in case
345 multiple bits are set. */
346 if(BIT_MASK_GET(input
[0], PKE_REG_FBRST_STC_B
, PKE_REG_FBRST_STC_E
))
348 /* clear a bunch of status bits */
349 PKE_REG_MASK_SET(me
, STAT
, PSS
, 0);
350 PKE_REG_MASK_SET(me
, STAT
, PFS
, 0);
351 PKE_REG_MASK_SET(me
, STAT
, PIS
, 0);
352 PKE_REG_MASK_SET(me
, STAT
, INT
, 0);
353 PKE_REG_MASK_SET(me
, STAT
, ER0
, 0);
354 PKE_REG_MASK_SET(me
, STAT
, ER1
, 0);
355 me
->flags
&= ~PKE_FLAG_PENDING_PSS
;
356 /* will allow resumption of possible stalled instruction */
358 if(BIT_MASK_GET(input
[0], PKE_REG_FBRST_STP_B
, PKE_REG_FBRST_STP_E
))
360 me
->flags
|= PKE_FLAG_PENDING_PSS
;
362 if(BIT_MASK_GET(input
[0], PKE_REG_FBRST_FBK_B
, PKE_REG_FBRST_FBK_E
))
364 PKE_REG_MASK_SET(me
, STAT
, PFS
, 1);
366 if(BIT_MASK_GET(input
[0], PKE_REG_FBRST_RST_B
, PKE_REG_FBRST_RST_E
))
368 /* clear FIFO by skipping to word after PC: also
369 prevents re-execution attempt of possible stalled
371 me
->fifo_num_elements
= me
->fifo_pc
;
372 /* clear registers, flag, other state */
373 memset(me
->regs
, 0, sizeof(me
->regs
));
374 me
->fifo_qw_done
= 0;
381 /* copy bottom three bits */
382 BIT_MASK_SET(me
->regs
[PKE_REG_ERR
][0], 0, 2, BIT_MASK_GET(input
[0], 0, 2));
386 /* copy bottom sixteen bits */
387 PKE_REG_MASK_SET(me
, MARK
, MARK
, BIT_MASK_GET(input
[0], 0, 15));
388 /* reset MRK bit in STAT */
389 PKE_REG_MASK_SET(me
, STAT
, MRK
, 0);
392 /* handle common case of read-only registers */
393 /* PKE1-only registers - not really necessary to handle separately */
399 if(me
->pke_number
== 0)
402 /* PKE0 & PKE1 common registers*/
404 /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */
424 ASSERT(0); /* test above should prevent this possibility */
437 else if(addr
>= my_fifo_addr
&&
438 addr
< my_fifo_addr
+ sizeof(quadword
))
441 struct fifo_quadword
* fqw
;
442 int fifo_byte
= ADDR_OFFSET_QW(addr
); /* find byte-offset inside fifo quadword */
443 unsigned_4 dma_tag_present
= 0;
446 /* collect potentially-partial quadword in write buffer */
447 memcpy(((unsigned_1
*)& me
->fifo_qw_in_progress
) + fifo_byte
, src
, nr_bytes
);
448 /* mark bytes written */
449 for(i
= fifo_byte
; i
< fifo_byte
+ nr_bytes
; i
++)
450 BIT_MASK_SET(me
->fifo_qw_done
, i
, i
, 1);
452 /* return if quadword not quite written yet */
453 if(BIT_MASK_GET(me
->fifo_qw_done
, 0, sizeof(quadword
)-1) !=
454 BIT_MASK_BTW(0, sizeof(quadword
)-1))
457 /* all done - process quadword after clearing flag */
458 BIT_MASK_SET(me
->fifo_qw_done
, 0, sizeof(quadword
)-1, 0);
460 /* ensure FIFO has enough elements */
461 if(me
->fifo_num_elements
== me
->fifo_buffer_size
)
464 int new_fifo_buffer_size
= me
->fifo_buffer_size
+ 20;
465 void* ptr
= realloc((void*) me
->fifo
, new_fifo_buffer_size
*sizeof(struct fifo_quadword
));
469 /* oops, cannot enlarge FIFO any more */
470 device_error(me_
, "Cannot enlarge FIFO buffer\n");
475 me
->fifo_buffer_size
= new_fifo_buffer_size
;
478 /* add new quadword at end of FIFO */
479 fqw
= & me
->fifo
[me
->fifo_num_elements
];
480 fqw
->word_class
[0] = fqw
->word_class
[1] =
481 fqw
->word_class
[2] = fqw
->word_class
[3] = wc_unknown
;
482 memcpy((void*) fqw
->data
, me
->fifo_qw_in_progress
, sizeof(quadword
));
483 ASSERT(sizeof(unsigned_4
) == 4);
484 PKE_MEM_READ(me
, (me
->pke_number
== 0 ? DMA_D0_MADR
: DMA_D1_MADR
),
485 & fqw
->source_address
, /* target endian */
487 fqw
->source_address
= T2H_4(fqw
->source_address
);
488 PKE_MEM_READ(me
, (me
->pke_number
== 0 ? DMA_D0_PKTFLAG
: DMA_D1_PKTFLAG
),
494 /* lower two words are DMA tags */
495 fqw
->word_class
[0] = fqw
->word_class
[1] = wc_dma
;
498 me
->fifo_num_elements
++;
500 /* set FQC to "1" as FIFO is now not empty */
501 PKE_REG_MASK_SET(me
, STAT
, FQC
, 1);
513 /* Issue & swallow next PKE opcode if possible/available */
516 pke_issue(SIM_DESC sd
, struct pke_device
* me
)
518 struct fifo_quadword
* fqw
;
520 unsigned_4 cmd
, intr
, num
;
523 /* 1 -- test go / no-go for PKE execution */
525 /* switch on STAT:PSS if PSS-pending and in idle state */
526 if((PKE_REG_MASK_GET(me
, STAT
, PPS
) == PKE_REG_STAT_PPS_IDLE
) &&
527 (me
->flags
& PKE_FLAG_PENDING_PSS
) != 0)
529 me
->flags
&= ~PKE_FLAG_PENDING_PSS
;
530 PKE_REG_MASK_SET(me
, STAT
, PSS
, 1);
533 /* check for stall/halt control bits */
534 if(PKE_REG_MASK_GET(me
, STAT
, PFS
) ||
535 PKE_REG_MASK_GET(me
, STAT
, PSS
) || /* note special treatment below */
536 /* PEW bit not a reason to keep stalling - it's re-checked below */
537 /* PGW bit not a reason to keep stalling - it's re-checked below */
538 /* maskable stall controls: ER0, ER1, PIS */
539 (PKE_REG_MASK_GET(me
, STAT
, ER0
) && !PKE_REG_MASK_GET(me
, ERR
, ME0
)) ||
540 (PKE_REG_MASK_GET(me
, STAT
, ER1
) && !PKE_REG_MASK_GET(me
, ERR
, ME1
)) ||
541 (PKE_REG_MASK_GET(me
, STAT
, PIS
) && !PKE_REG_MASK_GET(me
, ERR
, MII
)))
543 /* try again next cycle; no state change */
547 /* confirm availability of new quadword of PKE instructions */
548 if(me
->fifo_num_elements
<= me
->fifo_pc
)
552 /* 2 -- fetch PKE instruction */
554 /* skip over DMA tag, if present */
555 pke_pc_advance(me
, 0);
557 /* "fetch" instruction quadword and word */
558 fqw
= & me
->fifo
[me
->fifo_pc
];
559 fw
= fqw
->data
[me
->qw_pc
];
561 /* store word in PKECODE register */
562 me
->regs
[PKE_REG_CODE
][0] = fw
;
565 /* 3 -- decode PKE instruction */
567 /* PKE instruction format: [intr 0:0][pke-command 6:0][num 7:0][immediate 15:0],
568 so op-code is in top byte. */
569 intr
= BIT_MASK_GET(fw
, PKE_OPCODE_I_B
, PKE_OPCODE_I_E
);
570 cmd
= BIT_MASK_GET(fw
, PKE_OPCODE_CMD_B
, PKE_OPCODE_CMD_E
);
571 num
= BIT_MASK_GET(fw
, PKE_OPCODE_NUM_B
, PKE_OPCODE_NUM_E
);
572 imm
= BIT_MASK_GET(fw
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
576 /* set INT flag in STAT register */
577 PKE_REG_MASK_SET(me
, STAT
, INT
, 1);
578 /* XXX: send interrupt to 5900? */
582 if(PKE_REG_MASK_GET(me
, STAT
, PPS
) == PKE_REG_STAT_PPS_IDLE
)
583 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_DECODE
);
585 /* decode & execute */
586 if(IS_PKE_CMD(cmd
, PKENOP
))
587 pke_code_nop(me
, fw
);
588 else if(IS_PKE_CMD(cmd
, STCYCL
))
589 pke_code_stcycl(me
, fw
);
590 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, OFFSET
))
591 pke_code_offset(me
, fw
);
592 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, BASE
))
593 pke_code_base(me
, fw
);
594 else if(IS_PKE_CMD(cmd
, ITOP
))
595 pke_code_itop(me
, fw
);
596 else if(IS_PKE_CMD(cmd
, STMOD
))
597 pke_code_stmod(me
, fw
);
598 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, MSKPATH3
))
599 pke_code_mskpath3(me
, fw
);
600 else if(IS_PKE_CMD(cmd
, PKEMARK
))
601 pke_code_pkemark(me
, fw
);
602 else if(IS_PKE_CMD(cmd
, FLUSHE
))
603 pke_code_flushe(me
, fw
);
604 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, FLUSH
))
605 pke_code_flush(me
, fw
);
606 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, FLUSHA
))
607 pke_code_flusha(me
, fw
);
608 else if(IS_PKE_CMD(cmd
, PKEMSCAL
))
609 pke_code_pkemscal(me
, fw
);
610 else if(IS_PKE_CMD(cmd
, PKEMSCNT
))
611 pke_code_pkemscnt(me
, fw
);
612 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, PKEMSCALF
))
613 pke_code_pkemscalf(me
, fw
);
614 else if(IS_PKE_CMD(cmd
, STMASK
))
615 pke_code_stmask(me
, fw
);
616 else if(IS_PKE_CMD(cmd
, STROW
))
617 pke_code_strow(me
, fw
);
618 else if(IS_PKE_CMD(cmd
, STCOL
))
619 pke_code_stcol(me
, fw
);
620 else if(IS_PKE_CMD(cmd
, MPG
))
621 pke_code_mpg(me
, fw
);
622 else if(IS_PKE_CMD(cmd
, DIRECT
))
623 pke_code_direct(me
, fw
);
624 else if(IS_PKE_CMD(cmd
, DIRECTHL
))
625 pke_code_directhl(me
, fw
);
626 else if(IS_PKE_CMD(cmd
, UNPACK
))
627 pke_code_unpack(me
, fw
);
628 /* ... no other commands ... */
630 pke_code_error(me
, fw
);
635 /* advance the PC by given number of data words; update STAT/FQC
636 field; assume FIFO is filled enough; classify passed-over words;
637 write FIFO trace line */
640 pke_pc_advance(struct pke_device
* me
, int num_words
)
643 struct fifo_quadword
* fq
= NULL
;
644 ASSERT(num_words
>= 0);
646 /* printf("pke %d pc_advance num_words %d\n", me->pke_number, num_words); */
650 fq
= & me
->fifo
[me
->fifo_pc
];
652 /* skip over DMA tag words if present in word 0 or 1 */
653 if(fq
->word_class
[me
->qw_pc
] == wc_dma
)
655 /* skip by going around loop an extra time */
659 /* nothing left to skip / no DMA tag here */
663 /* one word skipped */
666 /* point to next word */
673 /* trace the consumption of the FIFO quadword we just skipped over */
674 /* fq still points to it */
675 if(me
->fifo_trace_file
!= NULL
)
677 /* assert complete classification */
678 ASSERT(fq
->word_class
[3] != wc_unknown
);
679 ASSERT(fq
->word_class
[2] != wc_unknown
);
680 ASSERT(fq
->word_class
[1] != wc_unknown
);
681 ASSERT(fq
->word_class
[0] != wc_unknown
);
683 /* print trace record */
684 fprintf(me
->fifo_trace_file
,
685 "%d 0x%08x_%08x_%08x_%08x 0x%08x %c%c%c%c\n",
686 (me
->pke_number
== 0 ? 0 : 1),
687 (unsigned) fq
->data
[3], (unsigned) fq
->data
[2],
688 (unsigned) fq
->data
[1], (unsigned) fq
->data
[0],
689 (unsigned) fq
->source_address
,
690 fq
->word_class
[3], fq
->word_class
[2],
691 fq
->word_class
[1], fq
->word_class
[0]);
694 /* XXX: zap old entries in FIFO */
695 } /* next quadword */
698 /* clear FQC if FIFO is now empty */
699 if(me
->fifo_num_elements
== me
->fifo_pc
)
701 PKE_REG_MASK_SET(me
, STAT
, FQC
, 0);
703 else /* annote the word where the PC lands as an PKEcode */
705 fq
= & me
->fifo
[me
->fifo_pc
];
706 ASSERT(fq
->word_class
[me
->qw_pc
] == wc_pkecode
||
707 fq
->word_class
[me
->qw_pc
] == wc_unknown
);
708 fq
->word_class
[me
->qw_pc
] = wc_pkecode
;
714 /* Return pointer to FIFO quadword containing given operand# in FIFO.
715 `operand_num' starts at 1. Return pointer to operand word in last
716 argument, if non-NULL. If FIFO is not full enough, return 0.
717 Signal an ER0 indication upon skipping a DMA tag. */
719 struct fifo_quadword
*
720 pke_pc_fifo(struct pke_device
* me
, int operand_num
, unsigned_4
** operand
)
722 int num
= operand_num
;
723 int new_qw_pc
, new_fifo_pc
;
724 struct fifo_quadword
* fq
= NULL
;
728 /* snapshot current pointers */
729 new_fifo_pc
= me
->fifo_pc
;
730 new_qw_pc
= me
->qw_pc
;
732 /* printf("pke %d pc_fifo operand_num %d\n", me->pke_number, operand_num); */
736 /* one word skipped */
739 /* point to next word */
747 /* check for FIFO underflow */
748 if(me
->fifo_num_elements
== new_fifo_pc
)
754 /* skip over DMA tag words if present in word 0 or 1 */
755 fq
= & me
->fifo
[new_fifo_pc
];
756 if(fq
->word_class
[new_qw_pc
] == wc_dma
)
758 /* mismatch error! */
759 PKE_REG_MASK_SET(me
, STAT
, ER0
, 1);
760 /* skip by going around loop an extra time */
766 /* return pointer to operand word itself */
769 *operand
= & fq
->data
[new_qw_pc
];
771 /* annote the word where the pseudo lands as an PKE operand */
772 ASSERT(fq
->word_class
[new_qw_pc
] == wc_pkedata
||
773 fq
->word_class
[new_qw_pc
] == wc_unknown
);
774 fq
->word_class
[new_qw_pc
] = wc_pkedata
;
781 /* Return pointer to given operand# in FIFO. `operand_num' starts at 1.
782 If FIFO is not full enough, return 0. Skip over DMA tags, but mark
783 them as an error (ER0). */
786 pke_pc_operand(struct pke_device
* me
, int operand_num
)
788 unsigned_4
* operand
= NULL
;
789 struct fifo_quadword
* fifo_operand
;
791 fifo_operand
= pke_pc_fifo(me
, operand_num
, & operand
);
793 if(fifo_operand
== NULL
)
794 ASSERT(operand
== NULL
); /* pke_pc_fifo() ought leave it untouched */
800 /* Return a bit-field extract of given operand# in FIFO, and its
801 source-addr. `bit_offset' starts at 0, referring to LSB after PKE
802 instruction word. Width must be >0, <=32. Assume FIFO is full
803 enough. Skip over DMA tags, but mark them as an error (ER0). */
806 pke_pc_operand_bits(struct pke_device
* me
, int bit_offset
, int bit_width
, unsigned_4
* source_addr
)
808 unsigned_4
* word
= NULL
;
810 struct fifo_quadword
* fifo_operand
;
811 int wordnumber
, bitnumber
;
813 wordnumber
= bit_offset
/32;
814 bitnumber
= bit_offset
%32;
816 /* find operand word with bitfield */
817 fifo_operand
= pke_pc_fifo(me
, wordnumber
+ 1, &word
);
818 ASSERT(word
!= NULL
);
820 /* extract bitfield from word */
821 value
= BIT_MASK_GET(*word
, bitnumber
, bitnumber
+ bit_width
- 1);
823 /* extract source addr from fifo word */
824 *source_addr
= fifo_operand
->source_address
;
831 /* check for stall conditions on indicated devices (path* only on PKE1), do not change status
832 return 0 iff no stall */
834 pke_check_stall(struct pke_device
* me
, enum pke_check_target what
)
837 unsigned_4 cop2_stat
, gpuif_stat
;
839 /* read status words */
840 ASSERT(sizeof(unsigned_4
) == 4);
841 PKE_MEM_READ(me
, (GIF_REG_STAT
),
844 PKE_MEM_READ(me
, (COP2_REG_STAT_ADDR
),
851 if(me
->pke_number
== 0)
852 any_stall
= BIT_MASK_GET(cop2_stat
, COP2_REG_STAT_VBS0_B
, COP2_REG_STAT_VBS0_E
);
853 else /* if(me->pke_number == 1) */
854 any_stall
= BIT_MASK_GET(cop2_stat
, COP2_REG_STAT_VBS1_B
, COP2_REG_STAT_VBS1_E
);
856 else if(what
== chk_path1
) /* VU -> GPUIF */
858 if(BIT_MASK_GET(gpuif_stat
, GPUIF_REG_STAT_APATH_B
, GPUIF_REG_STAT_APATH_E
) == 1)
861 else if(what
== chk_path2
) /* PKE -> GPUIF */
863 if(BIT_MASK_GET(gpuif_stat
, GPUIF_REG_STAT_APATH_B
, GPUIF_REG_STAT_APATH_E
) == 2)
866 else if(what
== chk_path3
) /* DMA -> GPUIF */
868 if(BIT_MASK_GET(gpuif_stat
, GPUIF_REG_STAT_APATH_B
, GPUIF_REG_STAT_APATH_E
) == 3)
877 /* any stall reasons? */
882 /* flip the DBF bit; recompute TOPS, ITOP & TOP */
884 pke_flip_dbf(struct pke_device
* me
)
886 /* compute new ITOP and TOP */
887 PKE_REG_MASK_SET(me
, ITOP
, ITOP
,
888 PKE_REG_MASK_GET(me
, ITOPS
, ITOPS
));
889 PKE_REG_MASK_SET(me
, TOP
, TOP
,
890 PKE_REG_MASK_GET(me
, TOPS
, TOPS
));
892 PKE_REG_MASK_SET(me
, DBF
, DF
,
893 PKE_REG_MASK_GET(me
, DBF
, DF
) ? 0 : 1);
894 PKE_REG_MASK_SET(me
, STAT
, DBF
, PKE_REG_MASK_GET(me
, DBF
, DF
));
895 /* compute new TOPS */
896 PKE_REG_MASK_SET(me
, TOPS
, TOPS
,
897 (PKE_REG_MASK_GET(me
, BASE
, BASE
) +
898 (PKE_REG_MASK_GET(me
, DBF
, DF
) *
899 PKE_REG_MASK_GET(me
, OFST
, OFFSET
))));
904 /* PKEcode handler functions -- responsible for checking and
905 confirming old stall conditions, executing pkecode, updating PC and
906 status registers -- may assume being run on correct PKE unit */
909 pke_code_nop(struct pke_device
* me
, unsigned_4 pkecode
)
912 pke_pc_advance(me
, 1);
913 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
918 pke_code_stcycl(struct pke_device
* me
, unsigned_4 pkecode
)
920 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
921 /* copy immediate value into CYCLE reg */
922 PKE_REG_MASK_SET(me
, CYCLE
, WL
, BIT_MASK_GET(imm
, 8, 15));
923 PKE_REG_MASK_SET(me
, CYCLE
, CL
, BIT_MASK_GET(imm
, 0, 7));
925 pke_pc_advance(me
, 1);
926 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
931 pke_code_offset(struct pke_device
* me
, unsigned_4 pkecode
)
933 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
934 /* copy 10 bits to OFFSET field */
935 PKE_REG_MASK_SET(me
, OFST
, OFFSET
, BIT_MASK_GET(imm
, 0, 9));
937 PKE_REG_MASK_SET(me
, DBF
, DF
, 0);
938 /* clear other DBF bit */
939 PKE_REG_MASK_SET(me
, STAT
, DBF
, 0);
940 /* set TOPS = BASE */
941 PKE_REG_MASK_SET(me
, TOPS
, TOPS
, PKE_REG_MASK_GET(me
, BASE
, BASE
));
943 pke_pc_advance(me
, 1);
944 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
949 pke_code_base(struct pke_device
* me
, unsigned_4 pkecode
)
951 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
952 /* copy 10 bits to BASE field */
953 PKE_REG_MASK_SET(me
, BASE
, BASE
, BIT_MASK_GET(imm
, 0, 9));
955 PKE_REG_MASK_SET(me
, DBF
, DF
, 0);
956 /* clear other DBF bit */
957 PKE_REG_MASK_SET(me
, STAT
, DBF
, 0);
958 /* set TOPS = BASE */
959 PKE_REG_MASK_SET(me
, TOPS
, TOPS
, PKE_REG_MASK_GET(me
, BASE
, BASE
));
961 pke_pc_advance(me
, 1);
962 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
967 pke_code_itop(struct pke_device
* me
, unsigned_4 pkecode
)
969 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
970 /* copy 10 bits to ITOPS field */
971 PKE_REG_MASK_SET(me
, ITOPS
, ITOPS
, BIT_MASK_GET(imm
, 0, 9));
973 pke_pc_advance(me
, 1);
974 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
979 pke_code_stmod(struct pke_device
* me
, unsigned_4 pkecode
)
981 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
982 /* copy 2 bits to MODE register */
983 PKE_REG_MASK_SET(me
, MODE
, MDE
, BIT_MASK_GET(imm
, 0, 2));
985 pke_pc_advance(me
, 1);
986 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
991 pke_code_mskpath3(struct pke_device
* me
, unsigned_4 pkecode
)
993 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
996 /* set appropriate bit */
997 if(BIT_MASK_GET(imm
, PKE_REG_MSKPATH3_B
, PKE_REG_MSKPATH3_E
) != 0)
998 gif_mode
= GIF_REG_MODE_M3R_MASK
;
1002 /* write register; patrickm code will look at M3R bit only */
1003 PKE_MEM_WRITE(me
, GIF_REG_MODE
, & gif_mode
, 4);
1006 pke_pc_advance(me
, 1);
1007 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1012 pke_code_pkemark(struct pke_device
* me
, unsigned_4 pkecode
)
1014 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1015 /* copy 16 bits to MARK register */
1016 PKE_REG_MASK_SET(me
, MARK
, MARK
, BIT_MASK_GET(imm
, 0, 15));
1017 /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */
1018 PKE_REG_MASK_SET(me
, STAT
, MRK
, 1);
1020 pke_pc_advance(me
, 1);
1021 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1026 pke_code_flushe(struct pke_device
* me
, unsigned_4 pkecode
)
1028 /* compute next PEW bit */
1029 if(pke_check_stall(me
, chk_vu
))
1032 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1033 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1034 /* try again next cycle */
1039 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1040 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1041 pke_pc_advance(me
, 1);
1047 pke_code_flush(struct pke_device
* me
, unsigned_4 pkecode
)
1049 int something_busy
= 0;
1051 /* compute next PEW, PGW bits */
1052 if(pke_check_stall(me
, chk_vu
))
1055 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1058 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1061 if(pke_check_stall(me
, chk_path1
) ||
1062 pke_check_stall(me
, chk_path2
))
1065 PKE_REG_MASK_SET(me
, STAT
, PGW
, 1);
1068 PKE_REG_MASK_SET(me
, STAT
, PGW
, 0);
1073 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1074 /* try again next cycle */
1079 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1080 pke_pc_advance(me
, 1);
1086 pke_code_flusha(struct pke_device
* me
, unsigned_4 pkecode
)
1088 int something_busy
= 0;
1090 /* compute next PEW, PGW bits */
1091 if(pke_check_stall(me
, chk_vu
))
1094 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1097 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1100 if(pke_check_stall(me
, chk_path1
) ||
1101 pke_check_stall(me
, chk_path2
) ||
1102 pke_check_stall(me
, chk_path3
))
1105 PKE_REG_MASK_SET(me
, STAT
, PGW
, 1);
1108 PKE_REG_MASK_SET(me
, STAT
, PGW
, 0);
1112 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1113 /* try again next cycle */
1118 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1119 pke_pc_advance(me
, 1);
1125 pke_code_pkemscal(struct pke_device
* me
, unsigned_4 pkecode
)
1127 /* compute next PEW bit */
1128 if(pke_check_stall(me
, chk_vu
))
1131 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1132 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1133 /* try again next cycle */
1138 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1141 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1143 /* flip DBF on PKE1 */
1144 if(me
->pke_number
== 1)
1147 /* compute new PC for VU */
1148 vu_pc
= BIT_MASK_GET(imm
, 0, 15);
1150 /* write new PC; callback function gets VU running */
1151 ASSERT(sizeof(unsigned_4
) == 4);
1152 PKE_MEM_WRITE(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1157 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1158 pke_pc_advance(me
, 1);
1165 pke_code_pkemscnt(struct pke_device
* me
, unsigned_4 pkecode
)
1167 /* compute next PEW bit */
1168 if(pke_check_stall(me
, chk_vu
))
1171 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1172 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1173 /* try again next cycle */
1180 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1182 /* flip DBF on PKE1 */
1183 if(me
->pke_number
== 1)
1187 ASSERT(sizeof(unsigned_4
) == 4);
1188 PKE_MEM_READ(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1192 /* rewrite new PC; callback function gets VU running */
1193 ASSERT(sizeof(unsigned_4
) == 4);
1194 PKE_MEM_WRITE(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1199 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1200 pke_pc_advance(me
, 1);
1206 pke_code_pkemscalf(struct pke_device
* me
, unsigned_4 pkecode
)
1208 int something_busy
= 0;
1210 /* compute next PEW, PGW bits */
1211 if(pke_check_stall(me
, chk_vu
))
1214 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1217 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1220 if(pke_check_stall(me
, chk_path1
) ||
1221 pke_check_stall(me
, chk_path2
) ||
1222 pke_check_stall(me
, chk_path3
))
1225 PKE_REG_MASK_SET(me
, STAT
, PGW
, 1);
1228 PKE_REG_MASK_SET(me
, STAT
, PGW
, 0);
1233 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1234 /* try again next cycle */
1239 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1241 /* flip DBF on PKE1 */
1242 if(me
->pke_number
== 1)
1245 /* compute new PC for VU */
1246 vu_pc
= BIT_MASK_GET(imm
, 0, 15);
1248 /* rewrite new PC; callback function gets VU running */
1249 ASSERT(sizeof(unsigned_4
) == 4);
1250 PKE_MEM_WRITE(me
, (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1255 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1256 pke_pc_advance(me
, 1);
1262 pke_code_stmask(struct pke_device
* me
, unsigned_4 pkecode
)
1264 /* check that FIFO has one more word for STMASK operand */
1267 mask
= pke_pc_operand(me
, 1);
1270 /* "transferring" operand */
1271 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1274 PKE_REG_MASK_SET(me
, NUM
, NUM
, 1);
1276 /* fill the register */
1277 PKE_REG_MASK_SET(me
, MASK
, MASK
, *mask
);
1280 PKE_REG_MASK_SET(me
, NUM
, NUM
, 0);
1283 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1284 pke_pc_advance(me
, 2);
1288 /* need to wait for another word */
1289 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1290 /* try again next cycle */
1296 pke_code_strow(struct pke_device
* me
, unsigned_4 pkecode
)
1298 /* check that FIFO has four more words for STROW operand */
1299 unsigned_4
* last_op
;
1301 last_op
= pke_pc_operand(me
, 4);
1304 /* "transferring" operand */
1305 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1308 PKE_REG_MASK_SET(me
, NUM
, NUM
, 1);
1310 /* copy ROW registers: must all exist if 4th operand exists */
1311 me
->regs
[PKE_REG_R0
][0] = * pke_pc_operand(me
, 1);
1312 me
->regs
[PKE_REG_R1
][0] = * pke_pc_operand(me
, 2);
1313 me
->regs
[PKE_REG_R2
][0] = * pke_pc_operand(me
, 3);
1314 me
->regs
[PKE_REG_R3
][0] = * pke_pc_operand(me
, 4);
1317 PKE_REG_MASK_SET(me
, NUM
, NUM
, 0);
1320 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1321 pke_pc_advance(me
, 5);
1325 /* need to wait for another word */
1326 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1327 /* try again next cycle */
1333 pke_code_stcol(struct pke_device
* me
, unsigned_4 pkecode
)
1335 /* check that FIFO has four more words for STCOL operand */
1336 unsigned_4
* last_op
;
1338 last_op
= pke_pc_operand(me
, 4);
1341 /* "transferring" operand */
1342 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1345 PKE_REG_MASK_SET(me
, NUM
, NUM
, 1);
1347 /* copy COL registers: must all exist if 4th operand exists */
1348 me
->regs
[PKE_REG_C0
][0] = * pke_pc_operand(me
, 1);
1349 me
->regs
[PKE_REG_C1
][0] = * pke_pc_operand(me
, 2);
1350 me
->regs
[PKE_REG_C2
][0] = * pke_pc_operand(me
, 3);
1351 me
->regs
[PKE_REG_C3
][0] = * pke_pc_operand(me
, 4);
1354 PKE_REG_MASK_SET(me
, NUM
, NUM
, 0);
1357 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1358 pke_pc_advance(me
, 5);
1362 /* need to wait for another word */
1363 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1364 /* try again next cycle */
1370 pke_code_mpg(struct pke_device
* me
, unsigned_4 pkecode
)
1372 unsigned_4
* last_mpg_word
;
1373 int num
= BIT_MASK_GET(pkecode
, PKE_OPCODE_NUM_B
, PKE_OPCODE_NUM_E
);
1374 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1376 /* assert 64-bit alignment of MPG operand */
1377 if(me
->qw_pc
!= 3 && me
->qw_pc
!= 1)
1378 return pke_code_error(me
, pkecode
);
1380 /* map zero to max+1 */
1381 if(num
==0) num
=0x100;
1383 /* check that FIFO has a few more words for MPG operand */
1384 last_mpg_word
= pke_pc_operand(me
, num
*2); /* num: number of 64-bit words */
1385 if(last_mpg_word
!= NULL
)
1387 /* perform implied FLUSHE */
1388 if(pke_check_stall(me
, chk_vu
))
1391 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1392 /* retry this instruction next clock */
1399 /* "transferring" operand */
1400 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1403 PKE_REG_MASK_SET(me
, NUM
, NUM
, num
);
1405 /* transfer VU instructions, one word-pair per iteration */
1406 for(i
=0; i
<num
; i
++)
1408 address_word vu_addr_base
, vu_addr
;
1409 address_word vutrack_addr_base
, vutrack_addr
;
1410 address_word vu_addr_max_size
;
1411 unsigned_4 vu_lower_opcode
, vu_upper_opcode
;
1412 unsigned_4
* operand
;
1413 unsigned_4 source_addr
;
1414 struct fifo_quadword
* fq
;
1418 next_num
= PKE_REG_MASK_GET(me
, NUM
, NUM
) - 1;
1419 PKE_REG_MASK_SET(me
, NUM
, NUM
, next_num
);
1421 /* imm: in 64-bit units for MPG instruction */
1422 /* VU*_MEM0 : instruction memory */
1423 vu_addr_base
= (me
->pke_number
== 0) ?
1424 VU0_MEM0_WINDOW_START
: VU1_MEM0_WINDOW_START
;
1425 vu_addr_max_size
= (me
->pke_number
== 0) ?
1426 VU0_MEM0_SIZE
: VU1_MEM0_SIZE
;
1427 vutrack_addr_base
= (me
->pke_number
== 0) ?
1428 VU0_MEM0_SRCADDR_START
: VU1_MEM0_SRCADDR_START
;
1430 /* compute VU address for this word-pair */
1431 vu_addr
= vu_addr_base
+ (imm
+ i
) * 8;
1432 /* check for vu_addr overflow */
1433 while(vu_addr
>= vu_addr_base
+ vu_addr_max_size
)
1434 vu_addr
-= vu_addr_max_size
;
1436 /* compute VU tracking address */
1437 vutrack_addr
= vutrack_addr_base
+ ((signed_8
)vu_addr
- (signed_8
)vu_addr_base
) / 2;
1439 /* Fetch operand words; assume they are already little-endian for VU imem */
1440 fq
= pke_pc_fifo(me
, i
*2 + 1, & operand
);
1441 vu_lower_opcode
= *operand
;
1442 vu_upper_opcode
= *pke_pc_operand(me
, i
*2 + 2);
1444 /* write data into VU memory */
1445 /* lower (scalar) opcode comes in first word */
1446 PKE_MEM_WRITE(me
, vu_addr
,
1449 /* upper (vector) opcode comes in second word */
1450 ASSERT(sizeof(unsigned_4
) == 4);
1451 PKE_MEM_WRITE(me
, vu_addr
+ 4,
1455 /* write tracking address in target byte-order */
1456 source_addr
= H2T_4(fq
->source_address
);
1457 ASSERT(sizeof(unsigned_4
) == 4);
1458 PKE_MEM_WRITE(me
, vutrack_addr
,
1461 } /* VU xfer loop */
1464 ASSERT(PKE_REG_MASK_GET(me
, NUM
, NUM
) == 0);
1467 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1468 pke_pc_advance(me
, 1 + num
*2);
1470 } /* if FIFO full enough */
1473 /* need to wait for another word */
1474 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1475 /* retry this instruction next clock */
1481 pke_code_direct(struct pke_device
* me
, unsigned_4 pkecode
)
1483 /* check that FIFO has a few more words for DIRECT operand */
1484 unsigned_4
* last_direct_word
;
1485 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1487 /* assert 128-bit alignment of DIRECT operand */
1489 return pke_code_error(me
, pkecode
);
1491 /* map zero to max+1 */
1492 if(imm
==0) imm
=0x10000;
1494 last_direct_word
= pke_pc_operand(me
, imm
*4); /* imm: number of 128-bit words */
1495 if(last_direct_word
!= NULL
)
1501 /* "transferring" operand */
1502 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1504 /* transfer GPUIF quadwords, one word per iteration */
1505 for(i
=0; i
<imm
*4; i
++)
1507 unsigned_4
* operand
= pke_pc_operand(me
, 1+i
);
1509 /* collect word into quadword */
1510 fifo_data
[i
% 4] = *operand
;
1512 /* write to GPUIF FIFO only with full quadword */
1515 ASSERT(sizeof(fifo_data
) == 16);
1516 PKE_MEM_WRITE(me
, GIF_PATH2_FIFO_ADDR
,
1519 } /* write collected quadword */
1521 } /* GPUIF xfer loop */
1524 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1525 pke_pc_advance(me
, 1 + imm
*4);
1526 } /* if FIFO full enough */
1529 /* need to wait for another word */
1530 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1531 /* retry this instruction next clock */
1537 pke_code_directhl(struct pke_device
* me
, unsigned_4 pkecode
)
1539 /* treat the same as DIRECTH */
1540 pke_code_direct(me
, pkecode
);
1545 pke_code_unpack(struct pke_device
* me
, unsigned_4 pkecode
)
1547 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1548 int cmd
= BIT_MASK_GET(pkecode
, PKE_OPCODE_CMD_B
, PKE_OPCODE_CMD_E
);
1549 int num
= BIT_MASK_GET(pkecode
, PKE_OPCODE_NUM_B
, PKE_OPCODE_NUM_E
);
1550 short vn
= BIT_MASK_GET(cmd
, 2, 3); /* unpack shape controls */
1551 short vl
= BIT_MASK_GET(cmd
, 0, 1);
1552 int m
= BIT_MASK_GET(cmd
, 4, 4);
1553 short cl
= PKE_REG_MASK_GET(me
, CYCLE
, CL
); /* cycle controls */
1554 short wl
= PKE_REG_MASK_GET(me
, CYCLE
, WL
);
1555 int r
= BIT_MASK_GET(imm
, 15, 15); /* indicator bits in imm value */
1556 int usn
= BIT_MASK_GET(imm
, 14, 14);
1558 int n
, num_operands
;
1559 unsigned_4
* last_operand_word
= NULL
;
1561 /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */
1565 n
= cl
* (num
/wl
) + PKE_LIMIT(num
% wl
, cl
);
1566 num_operands
= ((32 >> vl
) * (vn
+1) * n
)/32;
1568 /* confirm that FIFO has enough words in it */
1569 if(num_operands
> 0)
1570 last_operand_word
= pke_pc_operand(me
, num_operands
);
1571 if(last_operand_word
!= NULL
|| num_operands
== 0)
1573 address_word vu_addr_base
, vutrack_addr_base
;
1574 address_word vu_addr_max_size
;
1575 int vector_num_out
, vector_num_in
;
1577 /* "transferring" operand */
1578 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1580 /* don't check whether VU is idle */
1582 /* compute VU address base */
1583 if(me
->pke_number
== 0)
1585 vu_addr_base
= VU0_MEM1_WINDOW_START
+ 16 * BIT_MASK_GET(imm
, 0, 9);
1586 vu_addr_max_size
= VU0_MEM1_SIZE
;
1587 vutrack_addr_base
= VU0_MEM1_SRCADDR_START
+ 4 * BIT_MASK_GET(imm
, 0, 9);
1591 vu_addr_base
= VU1_MEM1_WINDOW_START
+ 16 * BIT_MASK_GET(imm
, 0, 9);
1592 vu_addr_max_size
= VU1_MEM1_SIZE
;
1593 vutrack_addr_base
= VU1_MEM1_SRCADDR_START
+ 4 * BIT_MASK_GET(imm
, 0, 9);
1594 if(r
) /* double-buffering */
1596 vu_addr_base
+= 16 * PKE_REG_MASK_GET(me
, TOPS
, TOPS
);
1597 vutrack_addr_base
+= 4 * PKE_REG_MASK_GET(me
, TOPS
, TOPS
);
1603 PKE_REG_MASK_SET(me
, NUM
, NUM
, num
== 0 ? 0x100 : num
);
1605 /* transfer given number of vectors */
1606 vector_num_out
= 0; /* output vector number being processed */
1607 vector_num_in
= 0; /* argument vector number being processed */
1610 quadword vu_old_data
;
1611 quadword vu_new_data
;
1612 quadword unpacked_data
;
1613 address_word vu_addr
;
1614 address_word vutrack_addr
;
1615 unsigned_4 source_addr
= 0;
1620 next_num
= PKE_REG_MASK_GET(me
, NUM
, NUM
) - 1;
1621 PKE_REG_MASK_SET(me
, NUM
, NUM
, next_num
);
1623 /* compute VU destination address, as bytes in R5900 memory */
1626 /* map zero to max+1 */
1627 int addrwl
= (wl
== 0) ? 0x0100 : wl
;
1628 vu_addr
= vu_addr_base
+ 16*(cl
*(vector_num_out
/addrwl
) + (vector_num_out
%addrwl
));
1631 vu_addr
= vu_addr_base
+ 16*vector_num_out
;
1633 /* check for vu_addr overflow */
1634 while(vu_addr
>= vu_addr_base
+ vu_addr_max_size
)
1635 vu_addr
-= vu_addr_max_size
;
1637 /* compute address of tracking table entry */
1638 vutrack_addr
= vutrack_addr_base
+ ((signed_8
)vu_addr
- (signed_8
)vu_addr_base
) / 4;
1640 /* read old VU data word at address */
1641 ASSERT(sizeof(vu_old_data
) == 16);
1642 PKE_MEM_READ(me
, vu_addr
,
1646 /* yank memory out of little-endian order */
1648 vu_old_data
[i
] = LE2H_4(vu_old_data
[i
]);
1650 /* For cyclic unpack, next operand quadword may come from instruction stream
1652 if((num
== 0 && cl
== 0 && wl
== 0) || /* shortcut clear */
1653 ((cl
< wl
) && ((vector_num_out
% wl
) >= cl
))) /* wl != 0, set above */
1655 /* clear operand - used only in a "indeterminate" state */
1656 for(i
= 0; i
< 4; i
++)
1657 unpacked_data
[i
] = 0;
1661 /* compute packed vector dimensions */
1662 int vectorbits
, unitbits
;
1664 if(vl
< 3) /* PKE_UNPACK_*_{32,16,8} */
1666 unitbits
= (32 >> vl
);
1667 vectorbits
= unitbits
* (vn
+1);
1669 else if(vl
== 3 && vn
== 3) /* PKE_UNPACK_V4_5 */
1674 else /* illegal unpack variant */
1676 /* treat as illegal instruction */
1677 pke_code_error(me
, pkecode
);
1681 /* loop over columns */
1682 for(i
=0; i
<=vn
; i
++)
1686 /* offset in bits in current operand word */
1688 (vector_num_in
* vectorbits
) + (i
* unitbits
); /* # of bits from PKEcode */
1690 /* last unit of V4_5 is only one bit wide */
1691 if(vl
== 3 && vn
== 3 && i
== 3) /* PKE_UNPACK_V4_5 */
1694 /* fetch bitfield operand */
1695 operand
= pke_pc_operand_bits(me
, bitoffset
, unitbits
, & source_addr
);
1697 /* selectively sign-extend; not for V4_5 1-bit value */
1698 if(usn
|| unitbits
== 1)
1699 unpacked_data
[i
] = operand
;
1701 unpacked_data
[i
] = SEXT32(operand
, unitbits
-1);
1704 /* consumed a vector from the PKE instruction stream */
1706 } /* unpack word from instruction operand */
1708 /* compute replacement word */
1709 if(m
) /* use mask register? */
1711 /* compute index into mask register for this word */
1712 int addrwl
= (wl
== 0) ? 0x0100 : wl
;
1713 int mask_index
= PKE_LIMIT(vector_num_out
% addrwl
, 3);
1715 for(i
=0; i
<4; i
++) /* loop over columns */
1717 int mask_op
= PKE_MASKREG_GET(me
, mask_index
, i
);
1718 unsigned_4
* masked_value
= NULL
;
1719 unsigned_4 zero
= 0;
1723 case PKE_MASKREG_INPUT
:
1724 /* for vn == 0, all columns are copied from column 0 */
1726 masked_value
= & unpacked_data
[0];
1728 masked_value
= & zero
; /* arbitrary data: undefined in spec */
1730 masked_value
= & unpacked_data
[i
];
1733 case PKE_MASKREG_ROW
: /* exploit R0..R3 contiguity */
1734 masked_value
= & me
->regs
[PKE_REG_R0
+ i
][0];
1737 case PKE_MASKREG_COLUMN
: /* exploit C0..C3 contiguity */
1738 masked_value
= & me
->regs
[PKE_REG_C0
+ mask_index
][0];
1741 case PKE_MASKREG_NOTHING
:
1742 /* "write inhibit" by re-copying old data */
1743 masked_value
= & vu_old_data
[i
];
1748 /* no other cases possible */
1751 /* copy masked value for column */
1752 vu_new_data
[i
] = *masked_value
;
1753 } /* loop over columns */
1757 /* no mask - just copy over entire unpacked quadword */
1758 memcpy(vu_new_data
, unpacked_data
, sizeof(unpacked_data
));
1761 /* process STMOD register for accumulation operations */
1762 switch(PKE_REG_MASK_GET(me
, MODE
, MDE
))
1764 case PKE_MODE_ADDROW
: /* add row registers to output data */
1766 /* exploit R0..R3 contiguity */
1767 vu_new_data
[i
] += me
->regs
[PKE_REG_R0
+ i
][0];
1770 case PKE_MODE_ACCROW
: /* add row registers to output data; accumulate */
1773 /* exploit R0..R3 contiguity */
1774 vu_new_data
[i
] += me
->regs
[PKE_REG_R0
+ i
][0];
1775 me
->regs
[PKE_REG_R0
+ i
][0] = vu_new_data
[i
];
1779 case PKE_MODE_INPUT
: /* pass data through */
1784 /* yank memory into little-endian order */
1786 vu_new_data
[i
] = H2LE_4(vu_new_data
[i
]);
1788 /* write replacement word */
1789 ASSERT(sizeof(vu_new_data
) == 16);
1790 PKE_MEM_WRITE(me
, vu_addr
,
1794 /* write tracking address in target byte-order */
1795 source_addr
= H2T_4(source_addr
);
1796 ASSERT(sizeof(unsigned_4
) == 4);
1797 PKE_MEM_WRITE(me
, vutrack_addr
,
1801 /* next vector please */
1803 } /* vector transfer loop */
1804 while(PKE_REG_MASK_GET(me
, NUM
, NUM
) > 0);
1807 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1808 pke_pc_advance(me
, 1 + num_operands
);
1809 } /* PKE FIFO full enough */
1812 /* need to wait for another word */
1813 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1814 /* retry this instruction next clock */
1820 pke_code_error(struct pke_device
* me
, unsigned_4 pkecode
)
1822 /* set ER1 flag in STAT register */
1823 PKE_REG_MASK_SET(me
, STAT
, ER1
, 1);
1824 /* advance over faulty word */
1825 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1826 pke_pc_advance(me
, 1);