1 /* Copyright (C) 1998, Cygnus Solutions */
12 #include "sim-assert.h"
15 #include "sky-gpuif.h"
18 /* Imported functions */
20 void device_error (device
*me
, char* message
); /* device.c */
23 /* Internal function declarations */
25 static int pke_io_read_buffer(device
*, void*, int, address_word
,
26 unsigned, sim_cpu
*, sim_cia
);
27 static int pke_io_write_buffer(device
*, const void*, int, address_word
,
28 unsigned, sim_cpu
*, sim_cia
);
29 static void pke_issue(SIM_DESC
, struct pke_device
*);
30 static void pke_pc_advance(struct pke_device
*, int num_words
);
31 static unsigned_4
* pke_pc_operand(struct pke_device
*, int operand_num
);
32 static unsigned_4
pke_pc_operand_bits(struct pke_device
*, int bit_offset
,
33 int bit_width
, unsigned_4
* sourceaddr
);
34 static struct fifo_quadword
* pke_pc_fifo(struct pke_device
*, int operand_num
,
35 unsigned_4
** operand
);
36 static int pke_track_write(struct pke_device
*, const void* src
, int len
,
37 address_word dest
, unsigned_4 sourceaddr
);
38 static void pke_attach(SIM_DESC sd
, struct pke_device
* me
);
39 enum pke_check_target
{ chk_vu
, chk_path1
, chk_path2
, chk_path3
};
40 static int pke_check_stall(struct pke_device
* me
, enum pke_check_target what
);
41 static void pke_flip_dbf(struct pke_device
* me
);
42 /* PKEcode handlers */
43 static void pke_code_nop(struct pke_device
* me
, unsigned_4 pkecode
);
44 static void pke_code_stcycl(struct pke_device
* me
, unsigned_4 pkecode
);
45 static void pke_code_offset(struct pke_device
* me
, unsigned_4 pkecode
);
46 static void pke_code_base(struct pke_device
* me
, unsigned_4 pkecode
);
47 static void pke_code_itop(struct pke_device
* me
, unsigned_4 pkecode
);
48 static void pke_code_stmod(struct pke_device
* me
, unsigned_4 pkecode
);
49 static void pke_code_mskpath3(struct pke_device
* me
, unsigned_4 pkecode
);
50 static void pke_code_pkemark(struct pke_device
* me
, unsigned_4 pkecode
);
51 static void pke_code_flushe(struct pke_device
* me
, unsigned_4 pkecode
);
52 static void pke_code_flush(struct pke_device
* me
, unsigned_4 pkecode
);
53 static void pke_code_flusha(struct pke_device
* me
, unsigned_4 pkecode
);
54 static void pke_code_pkemscal(struct pke_device
* me
, unsigned_4 pkecode
);
55 static void pke_code_pkemscnt(struct pke_device
* me
, unsigned_4 pkecode
);
56 static void pke_code_pkemscalf(struct pke_device
* me
, unsigned_4 pkecode
);
57 static void pke_code_stmask(struct pke_device
* me
, unsigned_4 pkecode
);
58 static void pke_code_strow(struct pke_device
* me
, unsigned_4 pkecode
);
59 static void pke_code_stcol(struct pke_device
* me
, unsigned_4 pkecode
);
60 static void pke_code_mpg(struct pke_device
* me
, unsigned_4 pkecode
);
61 static void pke_code_direct(struct pke_device
* me
, unsigned_4 pkecode
);
62 static void pke_code_directhl(struct pke_device
* me
, unsigned_4 pkecode
);
63 static void pke_code_unpack(struct pke_device
* me
, unsigned_4 pkecode
);
64 static void pke_code_error(struct pke_device
* me
, unsigned_4 pkecode
);
70 struct pke_device pke0_device
=
72 { "pke0", &pke_io_read_buffer
, &pke_io_write_buffer
}, /* device */
75 {}, 0, /* FIFO write buffer */
76 NULL
, 0, 0, NULL
, /* FIFO */
81 struct pke_device pke1_device
=
83 { "pke1", &pke_io_read_buffer
, &pke_io_write_buffer
}, /* device */
86 {}, 0, /* FIFO write buffer */
87 NULL
, 0, 0, NULL
, /* FIFO */
93 /* External functions */
96 /* Attach PKE addresses to main memory */
99 pke0_attach(SIM_DESC sd
)
101 pke_attach(sd
, & pke0_device
);
105 pke1_attach(SIM_DESC sd
)
107 pke_attach(sd
, & pke1_device
);
112 /* Issue a PKE instruction if possible */
115 pke0_issue(SIM_DESC sd
)
117 pke_issue(sd
, & pke0_device
);
121 pke1_issue(SIM_DESC sd
)
123 pke_issue(sd
, & pke0_device
);
128 /* Internal functions */
131 /* Attach PKE memory regions to simulator */
134 pke_attach(SIM_DESC sd
, struct pke_device
* me
)
137 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
138 (me
->pke_number
== 0) ? PKE0_REGISTER_WINDOW_START
: PKE1_REGISTER_WINDOW_START
,
139 PKE_REGISTER_WINDOW_SIZE
/*nr_bytes*/,
145 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
146 (me
->pke_number
== 0) ? PKE0_FIFO_ADDR
: PKE1_FIFO_ADDR
,
147 sizeof(quadword
) /*nr_bytes*/,
152 /* source-addr tracking word */
153 sim_core_attach (sd
, NULL
, 0, access_read_write
, 0,
154 (me
->pke_number
== 0) ? PKE0_SRCADDR
: PKE1_SRCADDR
,
155 sizeof(unsigned_4
) /*nr_bytes*/,
158 zalloc(sizeof(unsigned_4
)) /*buffer*/);
160 /* attach to trace file if appropriate */
162 char trace_envvar
[80];
163 char* trace_filename
= NULL
;
164 sprintf(trace_envvar
, "VIF%d_TRACE_FILE", me
->pke_number
);
165 trace_filename
= getenv(trace_envvar
);
166 if(trace_filename
!= NULL
)
168 me
->fifo_trace_file
= fopen(trace_filename
, "w");
169 if(me
->fifo_trace_file
== NULL
)
171 perror("VIF FIFO trace error on fopen");
179 /* Handle a PKE read; return no. of bytes read */
182 pke_io_read_buffer(device
*me_
,
190 /* downcast to gather embedding pke_device struct */
191 struct pke_device
* me
= (struct pke_device
*) me_
;
193 /* find my address ranges */
194 address_word my_reg_start
=
195 (me
->pke_number
== 0) ? PKE0_REGISTER_WINDOW_START
: PKE1_REGISTER_WINDOW_START
;
196 address_word my_fifo_addr
=
197 (me
->pke_number
== 0) ? PKE0_FIFO_ADDR
: PKE1_FIFO_ADDR
;
199 /* enforce that an access does not span more than one quadword */
200 address_word low
= ADDR_TRUNC_QW(addr
);
201 address_word high
= ADDR_TRUNC_QW(addr
+ nr_bytes
- 1);
205 /* classify address & handle */
206 if((addr
>= my_reg_start
) && (addr
< my_reg_start
+ PKE_REGISTER_WINDOW_SIZE
))
209 int reg_num
= ADDR_TRUNC_QW(addr
- my_reg_start
) >> 4;
210 int reg_byte
= ADDR_OFFSET_QW(addr
); /* find byte-offset inside register bank */
215 result
[0] = result
[1] = result
[2] = result
[3] = 0;
217 /* handle reads to individual registers; clear `readable' on error */
220 /* handle common case of register reading, side-effect free */
221 /* PKE1-only registers*/
227 if(me
->pke_number
== 0)
230 /* PKE0 & PKE1 common registers*/
249 result
[0] = me
->regs
[reg_num
][0];
252 /* handle common case of write-only registers */
258 ASSERT(0); /* test above should prevent this possibility */
261 /* perform transfer & return */
265 memcpy(dest
, ((unsigned_1
*) &result
) + reg_byte
, nr_bytes
);
277 else if(addr
>= my_fifo_addr
&&
278 addr
< my_fifo_addr
+ sizeof(quadword
))
282 /* FIFO is not readable: return a word of zeroes */
283 memset(dest
, 0, nr_bytes
);
292 /* Handle a PKE read; return no. of bytes written */
295 pke_io_write_buffer(device
*me_
,
303 /* downcast to gather embedding pke_device struct */
304 struct pke_device
* me
= (struct pke_device
*) me_
;
306 /* find my address ranges */
307 address_word my_reg_start
=
308 (me
->pke_number
== 0) ? PKE0_REGISTER_WINDOW_START
: PKE1_REGISTER_WINDOW_START
;
309 address_word my_fifo_addr
=
310 (me
->pke_number
== 0) ? PKE0_FIFO_ADDR
: PKE1_FIFO_ADDR
;
312 /* enforce that an access does not span more than one quadword */
313 address_word low
= ADDR_TRUNC_QW(addr
);
314 address_word high
= ADDR_TRUNC_QW(addr
+ nr_bytes
- 1);
318 /* classify address & handle */
319 if((addr
>= my_reg_start
) && (addr
< my_reg_start
+ PKE_REGISTER_WINDOW_SIZE
))
322 int reg_num
= ADDR_TRUNC_QW(addr
- my_reg_start
) >> 4;
323 int reg_byte
= ADDR_OFFSET_QW(addr
); /* find byte-offset inside register bank */
328 input
[0] = input
[1] = input
[2] = input
[3] = 0;
330 /* write user-given bytes into input */
331 memcpy(((unsigned_1
*) &input
) + reg_byte
, src
, nr_bytes
);
333 /* handle writes to individual registers; clear `writeable' on error */
337 /* Order these tests from least to most overriding, in case
338 multiple bits are set. */
339 if(BIT_MASK_GET(input
[0], 2, 2)) /* STC bit */
341 /* clear a bunch of status bits */
342 PKE_REG_MASK_SET(me
, STAT
, PSS
, 0);
343 PKE_REG_MASK_SET(me
, STAT
, PFS
, 0);
344 PKE_REG_MASK_SET(me
, STAT
, PIS
, 0);
345 PKE_REG_MASK_SET(me
, STAT
, INT
, 0);
346 PKE_REG_MASK_SET(me
, STAT
, ER0
, 0);
347 PKE_REG_MASK_SET(me
, STAT
, ER1
, 0);
348 me
->flags
&= ~PKE_FLAG_PENDING_PSS
;
349 /* will allow resumption of possible stalled instruction */
351 if(BIT_MASK_GET(input
[0], 2, 2)) /* STP bit */
353 me
->flags
|= PKE_FLAG_PENDING_PSS
;
355 if(BIT_MASK_GET(input
[0], 1, 1)) /* FBK bit */
357 PKE_REG_MASK_SET(me
, STAT
, PFS
, 1);
359 if(BIT_MASK_GET(input
[0], 0, 0)) /* RST bit */
361 /* clear FIFO by skipping to word after PC: also
362 prevents re-execution attempt of possible stalled
364 me
->fifo_num_elements
= me
->fifo_pc
;
365 /* clear registers, flag, other state */
366 memset(me
->regs
, 0, sizeof(me
->regs
));
367 me
->fifo_qw_done
= 0;
374 /* copy bottom three bits */
375 BIT_MASK_SET(me
->regs
[PKE_REG_ERR
][0], 0, 2, BIT_MASK_GET(input
[0], 0, 2));
379 /* copy bottom sixteen bits */
380 PKE_REG_MASK_SET(me
, MARK
, MARK
, BIT_MASK_GET(input
[0], 0, 15));
381 /* reset MRK bit in STAT */
382 PKE_REG_MASK_SET(me
, STAT
, MRK
, 0);
385 /* handle common case of read-only registers */
386 /* PKE1-only registers - not really necessary to handle separately */
392 if(me
->pke_number
== 0)
395 /* PKE0 & PKE1 common registers*/
397 /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */
417 ASSERT(0); /* test above should prevent this possibility */
434 else if(addr
>= my_fifo_addr
&&
435 addr
< my_fifo_addr
+ sizeof(quadword
))
438 struct fifo_quadword
* fqw
;
439 int fifo_byte
= ADDR_OFFSET_QW(addr
); /* find byte-offset inside fifo quadword */
440 unsigned_4 dma_tag_present
= 0;
443 /* collect potentially-partial quadword in write buffer */
444 memcpy(((unsigned_1
*)& me
->fifo_qw_in_progress
) + fifo_byte
, src
, nr_bytes
);
445 /* mark bytes written */
446 for(i
= fifo_byte
; i
< fifo_byte
+ nr_bytes
; i
++)
447 BIT_MASK_SET(me
->fifo_qw_done
, i
, i
, 1);
449 /* return if quadword not quite written yet */
450 if(BIT_MASK_GET(me
->fifo_qw_done
, 0, sizeof(quadword
)-1) !=
451 BIT_MASK_BTW(0, sizeof(quadword
)-1))
454 /* all done - process quadword after clearing flag */
455 BIT_MASK_SET(me
->fifo_qw_done
, 0, sizeof(quadword
)-1, 0);
457 /* ensure FIFO has enough elements */
458 if(me
->fifo_num_elements
== me
->fifo_buffer_size
)
461 int new_fifo_buffer_size
= me
->fifo_buffer_size
+ 20;
462 void* ptr
= realloc((void*) me
->fifo
, new_fifo_buffer_size
*sizeof(quadword
));
466 /* oops, cannot enlarge FIFO any more */
467 device_error(me_
, "Cannot enlarge FIFO buffer\n");
472 me
->fifo_buffer_size
= new_fifo_buffer_size
;
475 /* add new quadword at end of FIFO */
476 fqw
= & me
->fifo
[me
->fifo_num_elements
];
477 fqw
->word_class
[0] = fqw
->word_class
[1] =
478 fqw
->word_class
[2] = fqw
->word_class
[3] = wc_unknown
;
479 memcpy((void*) fqw
->data
, me
->fifo_qw_in_progress
, sizeof(quadword
));
480 ASSERT(sizeof(unsigned_4
) == 4);
481 PKE_MEM_READ((SIM_ADDR
) (me
->pke_number
== 0 ? DMA_D0_MADR
: DMA_D1_MADR
),
482 & fqw
->source_address
,
484 PKE_MEM_READ((SIM_ADDR
) (me
->pke_number
== 0 ? DMA_D0_PKTFLAG
: DMA_D1_PKTFLAG
),
490 /* lower two words are DMA tags */
491 fqw
->word_class
[0] = fqw
->word_class
[1] = wc_dma
;
495 me
->fifo_num_elements
++;
497 /* set FQC to "1" as FIFO is now not empty */
498 PKE_REG_MASK_SET(me
, STAT
, FQC
, 1);
510 /* Issue & swallow next PKE opcode if possible/available */
513 pke_issue(SIM_DESC sd
, struct pke_device
* me
)
515 struct fifo_quadword
* fqw
;
517 unsigned_4 cmd
, intr
, num
;
520 /* 1 -- test go / no-go for PKE execution */
522 /* switch on STAT:PSS if PSS-pending and in idle state */
523 if((PKE_REG_MASK_GET(me
, STAT
, PPS
) == PKE_REG_STAT_PPS_IDLE
) &&
524 (me
->flags
& PKE_FLAG_PENDING_PSS
) != 0)
526 me
->flags
&= ~PKE_FLAG_PENDING_PSS
;
527 PKE_REG_MASK_SET(me
, STAT
, PSS
, 1);
530 /* check for stall/halt control bits */
531 if(PKE_REG_MASK_GET(me
, STAT
, PFS
) ||
532 PKE_REG_MASK_GET(me
, STAT
, PSS
) || /* note special treatment below */
533 /* PEW bit not a reason to keep stalling - it's re-checked below */
534 /* PGW bit not a reason to keep stalling - it's re-checked below */
535 /* maskable stall controls: ER0, ER1, PIS */
536 (PKE_REG_MASK_GET(me
, STAT
, ER0
) && !PKE_REG_MASK_GET(me
, ERR
, ME0
)) ||
537 (PKE_REG_MASK_GET(me
, STAT
, ER1
) && !PKE_REG_MASK_GET(me
, ERR
, ME1
)) ||
538 (PKE_REG_MASK_GET(me
, STAT
, PIS
) && !PKE_REG_MASK_GET(me
, ERR
, MII
)))
540 /* try again next cycle; no state change */
544 /* confirm availability of new quadword of PKE instructions */
545 if(me
->fifo_num_elements
<= me
->fifo_pc
)
549 /* 2 -- fetch PKE instruction */
551 /* skip over DMA tag, if present */
552 pke_pc_advance(me
, 0);
554 /* "fetch" instruction quadword and word */
555 fqw
= & me
->fifo
[me
->fifo_pc
];
556 fw
= fqw
->data
[me
->qw_pc
];
558 /* store word in PKECODE register */
559 me
->regs
[PKE_REG_CODE
][0] = fw
;
562 /* 3 -- decode PKE instruction */
564 /* PKE instruction format: [intr 0:0][pke-command 6:0][num 7:0][immediate 15:0],
565 so op-code is in top byte. */
566 intr
= BIT_MASK_GET(fw
, PKE_OPCODE_I_B
, PKE_OPCODE_I_E
);
567 cmd
= BIT_MASK_GET(fw
, PKE_OPCODE_CMD_B
, PKE_OPCODE_CMD_E
);
568 num
= BIT_MASK_GET(fw
, PKE_OPCODE_NUM_B
, PKE_OPCODE_NUM_E
);
569 imm
= BIT_MASK_GET(fw
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
573 /* set INT flag in STAT register */
574 PKE_REG_MASK_SET(me
, STAT
, INT
, 1);
575 /* XXX: send interrupt to 5900? */
579 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_DECODE
);
581 /* decode & execute */
582 if(IS_PKE_CMD(cmd
, PKENOP
))
583 pke_code_nop(me
, fw
);
584 else if(IS_PKE_CMD(cmd
, STCYCL
))
585 pke_code_stcycl(me
, fw
);
586 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, OFFSET
))
587 pke_code_offset(me
, fw
);
588 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, BASE
))
589 pke_code_base(me
, fw
);
590 else if(IS_PKE_CMD(cmd
, ITOP
))
591 pke_code_itop(me
, fw
);
592 else if(IS_PKE_CMD(cmd
, STMOD
))
593 pke_code_stmod(me
, fw
);
594 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, MSKPATH3
))
595 pke_code_mskpath3(me
, fw
);
596 else if(IS_PKE_CMD(cmd
, PKEMARK
))
597 pke_code_pkemark(me
, fw
);
598 else if(IS_PKE_CMD(cmd
, FLUSHE
))
599 pke_code_flushe(me
, fw
);
600 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, FLUSH
))
601 pke_code_flush(me
, fw
);
602 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, FLUSHA
))
603 pke_code_flusha(me
, fw
);
604 else if(IS_PKE_CMD(cmd
, PKEMSCAL
))
605 pke_code_pkemscal(me
, fw
);
606 else if(IS_PKE_CMD(cmd
, PKEMSCNT
))
607 pke_code_pkemscnt(me
, fw
);
608 else if(me
->pke_number
== 1 && IS_PKE_CMD(cmd
, PKEMSCALF
))
609 pke_code_pkemscalf(me
, fw
);
610 else if(IS_PKE_CMD(cmd
, STMASK
))
611 pke_code_stmask(me
, fw
);
612 else if(IS_PKE_CMD(cmd
, STROW
))
613 pke_code_strow(me
, fw
);
614 else if(IS_PKE_CMD(cmd
, STCOL
))
615 pke_code_stcol(me
, fw
);
616 else if(IS_PKE_CMD(cmd
, MPG
))
617 pke_code_mpg(me
, fw
);
618 else if(IS_PKE_CMD(cmd
, DIRECT
))
619 pke_code_direct(me
, fw
);
620 else if(IS_PKE_CMD(cmd
, DIRECTHL
))
621 pke_code_directhl(me
, fw
);
622 else if(IS_PKE_CMD(cmd
, UNPACK
))
623 pke_code_unpack(me
, fw
);
624 /* ... no other commands ... */
626 pke_code_error(me
, fw
);
631 /* advance the PC by given number of data words; update STAT/FQC
632 field; assume FIFO is filled enough; classify passed-over words;
633 write FIFO trace line */
636 pke_pc_advance(struct pke_device
* me
, int num_words
)
639 struct fifo_quadword
* fq
= NULL
;
641 ASSERT(num_words
>= 0);
645 fq
= & me
->fifo
[me
->fifo_pc
];
647 /* skip over DMA tag words if present in word 0 or 1 */
648 if(fq
->word_class
[me
->qw_pc
] == wc_dma
)
650 /* skip by going around loop an extra time */
657 if(num
> 0) /* increment PC */
659 /* one word skipped */
662 /* point to next word */
669 /* trace the consumption of this FIFO quadword */
670 if(me
->fifo_trace_file
!= NULL
)
672 /* assert complete classification */
673 ASSERT(fq
->word_class
[3] != wc_unknown
);
674 ASSERT(fq
->word_class
[2] != wc_unknown
);
675 ASSERT(fq
->word_class
[1] != wc_unknown
);
676 ASSERT(fq
->word_class
[0] != wc_unknown
);
678 /* print trace record */
679 fprintf(me
->fifo_trace_file
,
680 "%d 0x%ux_%ux_%ux_%ux 0x%ux %c%c%c%c\n",
681 (me
->pke_number
== 0 ? 0 : 1),
682 (unsigned) fq
->data
[3], (unsigned) fq
->data
[2],
683 (unsigned) fq
->data
[1], (unsigned) fq
->data
[0],
684 (unsigned) fq
->source_address
,
685 fq
->word_class
[3], fq
->word_class
[2],
686 fq
->word_class
[1], fq
->word_class
[0]);
689 /* XXX: zap old entries in FIFO */
691 } /* next quadword */
693 } /* eat num words */
694 while(num
> 0 || skipped
);
696 /* clear FQC if FIFO is now empty */
697 if(me
->fifo_num_elements
== me
->fifo_pc
)
699 PKE_REG_MASK_SET(me
, STAT
, FQC
, 0);
701 else /* annote the word where the PC lands as an PKEcode */
703 ASSERT(fq
->word_class
[me
->qw_pc
] == wc_pkecode
||
704 fq
->word_class
[me
->qw_pc
] == wc_unknown
);
705 fq
->word_class
[me
->qw_pc
] = wc_pkecode
;
711 /* Return pointer to FIFO quadword containing given operand# in FIFO.
712 `operand_num' starts at 1. Return pointer to operand word in last
713 argument, if non-NULL. If FIFO is not full enough, return 0.
714 Signal an ER0 indication upon skipping a DMA tag. */
716 struct fifo_quadword
*
717 pke_pc_fifo(struct pke_device
* me
, int operand_num
, unsigned_4
** operand
)
719 int num
= operand_num
;
720 int new_qw_pc
, new_fifo_pc
;
721 struct fifo_quadword
* fq
= NULL
;
725 /* snapshot current pointers */
726 new_fifo_pc
= me
->fifo_pc
;
727 new_qw_pc
= me
->qw_pc
;
731 /* one word skipped */
734 /* point to next word */
742 /* check for FIFO underflow */
743 if(me
->fifo_num_elements
== new_fifo_pc
)
749 /* skip over DMA tag words if present in word 0 or 1 */
750 fq
= & me
->fifo
[new_fifo_pc
];
751 if(fq
->word_class
[new_qw_pc
] == wc_dma
)
753 /* mismatch error! */
754 PKE_REG_MASK_SET(me
, STAT
, ER0
, 1);
755 /* skip by going around loop an extra time */
761 /* return pointer to operand word itself */
764 *operand
= & fq
->data
[new_qw_pc
];
766 /* annote the word where the pseudo lands as an PKE operand */
767 ASSERT(fq
->word_class
[new_qw_pc
] == wc_pkedata
||
768 fq
->word_class
[new_qw_pc
] == wc_unknown
);
769 fq
->word_class
[new_qw_pc
] = wc_pkedata
;
776 /* Return pointer to given operand# in FIFO. `operand_num' starts at 1.
777 If FIFO is not full enough, return 0. Skip over DMA tags, but mark
778 them as an error (ER0). */
781 pke_pc_operand(struct pke_device
* me
, int operand_num
)
783 unsigned_4
* operand
= NULL
;
784 struct fifo_quadword
* fifo_operand
;
786 fifo_operand
= pke_pc_fifo(me
, operand_num
, & operand
);
788 if(fifo_operand
== NULL
)
789 ASSERT(operand
== NULL
); /* pke_pc_fifo() ought leave it untouched */
795 /* Return a bit-field extract of given operand# in FIFO, and its
796 source-addr. `bit_offset' starts at 0, referring to LSB after PKE
797 instruction word. Width must be >0, <=32. Assume FIFO is full
798 enough. Skip over DMA tags, but mark them as an error (ER0). */
801 pke_pc_operand_bits(struct pke_device
* me
, int bit_offset
, int bit_width
, unsigned_4
* source_addr
)
803 unsigned_4
* word
= NULL
;
805 struct fifo_quadword
* fifo_operand
;
807 /* find operand word with bitfield */
808 fifo_operand
= pke_pc_fifo(me
, (bit_offset
/ 32) + 1, &word
);
811 /* extract bitfield from word */
812 value
= BIT_MASK_GET(*word
, bit_offset
% 32, bit_width
);
814 /* extract source addr from fifo word */
815 *source_addr
= fifo_operand
->source_address
;
824 /* Write a bunch of bytes into simulator memory. Store the given source address into the
825 PKE sourceaddr tracking word. */
827 pke_track_write(struct pke_device
* me
, const void* src
, int len
,
828 address_word dest
, unsigned_4 sourceaddr
)
831 unsigned_4 no_sourceaddr
= 0;
833 /* write srcaddr into PKE srcaddr tracking */
835 (SIM_ADDR
) (me
->pke_number
== 0) ? PKE0_SRCADDR
: PKE1_SRCADDR
,
836 (void*) & sourceaddr
,
839 /* write bytes into simulator */
845 /* clear srcaddr from PKE srcaddr tracking */
847 (SIM_ADDR
) (me
->pke_number
== 0) ? PKE0_SRCADDR
: PKE1_SRCADDR
,
848 (void*) & no_sourceaddr
,
855 /* check for stall conditions on indicated devices (path* only on PKE1), do not change status
856 return 0 iff no stall */
858 pke_check_stall(struct pke_device
* me
, enum pke_check_target what
)
861 unsigned_4 cop2_stat
, gpuif_stat
;
863 /* read status words */
865 (SIM_ADDR
) (GIF_REG_STAT
),
866 (void*) & gpuif_stat
,
870 (SIM_ADDR
) (COP2_REG_STAT_ADDR
),
877 if(me
->pke_number
== 0)
878 any_stall
= BIT_MASK_GET(cop2_stat
, COP2_REG_STAT_VBS0_B
, COP2_REG_STAT_VBS0_E
);
879 else /* if(me->pke_number == 1) */
880 any_stall
= BIT_MASK_GET(cop2_stat
, COP2_REG_STAT_VBS1_B
, COP2_REG_STAT_VBS1_E
);
882 else if(what
== chk_path1
) /* VU -> GPUIF */
884 if(BIT_MASK_GET(gpuif_stat
, GPUIF_REG_STAT_APATH_B
, GPUIF_REG_STAT_APATH_E
) == 1)
887 else if(what
== chk_path2
) /* PKE -> GPUIF */
889 if(BIT_MASK_GET(gpuif_stat
, GPUIF_REG_STAT_APATH_B
, GPUIF_REG_STAT_APATH_E
) == 2)
892 else if(what
== chk_path3
) /* DMA -> GPUIF */
894 if(BIT_MASK_GET(gpuif_stat
, GPUIF_REG_STAT_APATH_B
, GPUIF_REG_STAT_APATH_E
) == 3)
903 /* any stall reasons? */
908 /* flip the DBF bit; recompute TOPS, ITOP & TOP */
910 pke_flip_dbf(struct pke_device
* me
)
913 PKE_REG_MASK_SET(me
, DBF
, DF
,
914 PKE_REG_MASK_GET(me
, DBF
, DF
) ? 0 : 1);
915 PKE_REG_MASK_SET(me
, STAT
, DBF
, PKE_REG_MASK_GET(me
, DBF
, DF
));
916 /* compute new TOPS */
917 PKE_REG_MASK_SET(me
, TOPS
, TOPS
,
918 (PKE_REG_MASK_GET(me
, BASE
, BASE
) +
919 (PKE_REG_MASK_GET(me
, DBF
, DF
) *
920 PKE_REG_MASK_GET(me
, OFST
, OFFSET
))));
921 /* compute new ITOP and TOP */
922 PKE_REG_MASK_SET(me
, ITOP
, ITOP
,
923 PKE_REG_MASK_GET(me
, ITOPS
, ITOPS
));
924 PKE_REG_MASK_SET(me
, TOP
, TOP
,
925 PKE_REG_MASK_GET(me
, TOPS
, TOPS
));
930 /* PKEcode handler functions -- responsible for checking and
931 confirming old stall conditions, executing pkecode, updating PC and
932 status registers -- may assume being run on correct PKE unit */
935 pke_code_nop(struct pke_device
* me
, unsigned_4 pkecode
)
938 pke_pc_advance(me
, 1);
939 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
944 pke_code_stcycl(struct pke_device
* me
, unsigned_4 pkecode
)
946 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
947 /* copy immediate value into CYCLE reg */
948 me
->regs
[PKE_REG_CYCLE
][0] = imm
;
950 pke_pc_advance(me
, 1);
951 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
956 pke_code_offset(struct pke_device
* me
, unsigned_4 pkecode
)
958 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
959 /* copy 10 bits to OFFSET field */
960 PKE_REG_MASK_SET(me
, OFST
, OFFSET
, BIT_MASK_GET(imm
, 0, 9));
962 PKE_REG_MASK_SET(me
, DBF
, DF
, 0);
963 /* clear other DBF bit */
964 PKE_REG_MASK_SET(me
, STAT
, DBF
, 0);
965 /* set TOPS = BASE */
966 PKE_REG_MASK_SET(me
, TOPS
, TOPS
, PKE_REG_MASK_GET(me
, BASE
, BASE
));
968 pke_pc_advance(me
, 1);
969 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
974 pke_code_base(struct pke_device
* me
, unsigned_4 pkecode
)
976 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
977 /* copy 10 bits to BASE field */
978 PKE_REG_MASK_SET(me
, BASE
, BASE
, BIT_MASK_GET(imm
, 0, 9));
980 PKE_REG_MASK_SET(me
, DBF
, DF
, 0);
981 /* clear other DBF bit */
982 PKE_REG_MASK_SET(me
, STAT
, DBF
, 0);
983 /* set TOPS = BASE */
984 PKE_REG_MASK_SET(me
, TOPS
, TOPS
, PKE_REG_MASK_GET(me
, BASE
, BASE
));
986 pke_pc_advance(me
, 1);
987 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
992 pke_code_itop(struct pke_device
* me
, unsigned_4 pkecode
)
994 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
995 /* copy 10 bits to ITOPS field */
996 PKE_REG_MASK_SET(me
, ITOPS
, ITOPS
, BIT_MASK_GET(imm
, 0, 9));
998 pke_pc_advance(me
, 1);
999 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1004 pke_code_stmod(struct pke_device
* me
, unsigned_4 pkecode
)
1006 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1007 /* copy 2 bits to MODE register */
1008 PKE_REG_MASK_SET(me
, MODE
, MDE
, BIT_MASK_GET(imm
, 0, 2));
1010 pke_pc_advance(me
, 1);
1011 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1016 pke_code_mskpath3(struct pke_device
* me
, unsigned_4 pkecode
)
1019 /* XXX: no easy interface toward GPUIF for this purpose */
1024 pke_code_pkemark(struct pke_device
* me
, unsigned_4 pkecode
)
1026 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1027 /* copy 16 bits to MARK register */
1028 PKE_REG_MASK_SET(me
, MARK
, MARK
, BIT_MASK_GET(imm
, 0, 15));
1029 /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */
1030 PKE_REG_MASK_SET(me
, STAT
, MRK
, 1);
1032 pke_pc_advance(me
, 1);
1033 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1038 pke_code_flushe(struct pke_device
* me
, unsigned_4 pkecode
)
1040 /* compute next PEW bit */
1041 if(pke_check_stall(me
, chk_vu
))
1044 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1045 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1046 /* try again next cycle */
1051 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1052 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1053 pke_pc_advance(me
, 1);
1059 pke_code_flush(struct pke_device
* me
, unsigned_4 pkecode
)
1061 int something_busy
= 0;
1063 /* compute next PEW, PGW bits */
1064 if(pke_check_stall(me
, chk_vu
))
1067 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1070 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1073 if(pke_check_stall(me
, chk_path1
) ||
1074 pke_check_stall(me
, chk_path2
))
1077 PKE_REG_MASK_SET(me
, STAT
, PGW
, 1);
1080 PKE_REG_MASK_SET(me
, STAT
, PGW
, 0);
1085 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1086 /* try again next cycle */
1091 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1092 pke_pc_advance(me
, 1);
1098 pke_code_flusha(struct pke_device
* me
, unsigned_4 pkecode
)
1100 int something_busy
= 0;
1102 /* compute next PEW, PGW bits */
1103 if(pke_check_stall(me
, chk_vu
))
1106 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1109 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1112 if(pke_check_stall(me
, chk_path1
) ||
1113 pke_check_stall(me
, chk_path2
) ||
1114 pke_check_stall(me
, chk_path3
))
1117 PKE_REG_MASK_SET(me
, STAT
, PGW
, 1);
1120 PKE_REG_MASK_SET(me
, STAT
, PGW
, 0);
1124 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1125 /* try again next cycle */
1130 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1131 pke_pc_advance(me
, 1);
1137 pke_code_pkemscal(struct pke_device
* me
, unsigned_4 pkecode
)
1139 /* compute next PEW bit */
1140 if(pke_check_stall(me
, chk_vu
))
1143 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1144 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1145 /* try again next cycle */
1150 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1153 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1155 /* flip DBF on PKE1 */
1156 if(me
->pke_number
== 1)
1159 /* compute new PC for VU */
1160 vu_pc
= BIT_MASK_GET(imm
, 0, 15);
1161 /* write new PC; callback function gets VU running */
1163 (SIM_ADDR
) (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1165 sizeof(unsigned_4
));
1168 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1169 pke_pc_advance(me
, 1);
1176 pke_code_pkemscnt(struct pke_device
* me
, unsigned_4 pkecode
)
1178 /* compute next PEW bit */
1179 if(pke_check_stall(me
, chk_vu
))
1182 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1183 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1184 /* try again next cycle */
1191 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1193 /* flip DBF on PKE1 */
1194 if(me
->pke_number
== 1)
1199 (SIM_ADDR
) (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1201 sizeof(unsigned_4
));
1203 /* rewrite new PC; callback function gets VU running */
1205 (SIM_ADDR
) (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1207 sizeof(unsigned_4
));
1210 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1211 pke_pc_advance(me
, 1);
1217 pke_code_pkemscalf(struct pke_device
* me
, unsigned_4 pkecode
)
1219 int something_busy
= 0;
1221 /* compute next PEW, PGW bits */
1222 if(pke_check_stall(me
, chk_vu
))
1225 PKE_REG_MASK_SET(me
, STAT
, PEW
, 1);
1228 PKE_REG_MASK_SET(me
, STAT
, PEW
, 0);
1231 if(pke_check_stall(me
, chk_path1
) ||
1232 pke_check_stall(me
, chk_path2
) ||
1233 pke_check_stall(me
, chk_path3
))
1236 PKE_REG_MASK_SET(me
, STAT
, PGW
, 1);
1239 PKE_REG_MASK_SET(me
, STAT
, PGW
, 0);
1244 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1245 /* try again next cycle */
1250 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1252 /* flip DBF on PKE1 */
1253 if(me
->pke_number
== 1)
1256 /* compute new PC for VU */
1257 vu_pc
= BIT_MASK_GET(imm
, 0, 15);
1258 /* write new PC; callback function gets VU running */
1260 (SIM_ADDR
) (me
->pke_number
== 0 ? VU0_CIA
: VU1_CIA
),
1262 sizeof(unsigned_4
));
1265 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1266 pke_pc_advance(me
, 1);
1272 pke_code_stmask(struct pke_device
* me
, unsigned_4 pkecode
)
1274 /* check that FIFO has one more word for STMASK operand */
1277 mask
= pke_pc_operand(me
, 1);
1280 /* "transferring" operand */
1281 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1284 PKE_REG_MASK_SET(me
, NUM
, NUM
, 1);
1286 /* fill the register */
1287 PKE_REG_MASK_SET(me
, MASK
, MASK
, *mask
);
1290 PKE_REG_MASK_SET(me
, NUM
, NUM
, 0);
1293 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1294 pke_pc_advance(me
, 2);
1298 /* need to wait for another word */
1299 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1300 /* try again next cycle */
1306 pke_code_strow(struct pke_device
* me
, unsigned_4 pkecode
)
1308 /* check that FIFO has four more words for STROW operand */
1309 unsigned_4
* last_op
;
1311 last_op
= pke_pc_operand(me
, 4);
1314 /* "transferring" operand */
1315 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1318 PKE_REG_MASK_SET(me
, NUM
, NUM
, 1);
1320 /* copy ROW registers: must all exist if 4th operand exists */
1321 me
->regs
[PKE_REG_R0
][0] = * pke_pc_operand(me
, 1);
1322 me
->regs
[PKE_REG_R1
][0] = * pke_pc_operand(me
, 2);
1323 me
->regs
[PKE_REG_R2
][0] = * pke_pc_operand(me
, 3);
1324 me
->regs
[PKE_REG_R3
][0] = * pke_pc_operand(me
, 4);
1327 PKE_REG_MASK_SET(me
, NUM
, NUM
, 0);
1330 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1331 pke_pc_advance(me
, 5);
1335 /* need to wait for another word */
1336 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1337 /* try again next cycle */
1343 pke_code_stcol(struct pke_device
* me
, unsigned_4 pkecode
)
1345 /* check that FIFO has four more words for STCOL operand */
1346 unsigned_4
* last_op
;
1348 last_op
= pke_pc_operand(me
, 4);
1351 /* "transferring" operand */
1352 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1355 PKE_REG_MASK_SET(me
, NUM
, NUM
, 1);
1357 /* copy COL registers: must all exist if 4th operand exists */
1358 me
->regs
[PKE_REG_C0
][0] = * pke_pc_operand(me
, 1);
1359 me
->regs
[PKE_REG_C1
][0] = * pke_pc_operand(me
, 2);
1360 me
->regs
[PKE_REG_C2
][0] = * pke_pc_operand(me
, 3);
1361 me
->regs
[PKE_REG_C3
][0] = * pke_pc_operand(me
, 4);
1364 PKE_REG_MASK_SET(me
, NUM
, NUM
, 0);
1367 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1368 pke_pc_advance(me
, 5);
1372 /* need to wait for another word */
1373 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1374 /* try again next cycle */
1380 pke_code_mpg(struct pke_device
* me
, unsigned_4 pkecode
)
1382 unsigned_4
* last_mpg_word
;
1383 int num
= BIT_MASK_GET(pkecode
, PKE_OPCODE_NUM_B
, PKE_OPCODE_NUM_E
);
1384 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1386 /* map zero to max+1 */
1387 if(num
==0) num
=0x100;
1389 /* check that FIFO has a few more words for MPG operand */
1390 last_mpg_word
= pke_pc_operand(me
, num
*2); /* num: number of 64-bit words */
1391 if(last_mpg_word
!= NULL
)
1393 /* perform implied FLUSHE */
1394 if(pke_check_stall(me
, chk_vu
))
1399 /* "transferring" operand */
1400 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1402 /* transfer VU instructions, one word per iteration */
1403 for(i
=0; i
<num
*2; i
++)
1405 address_word vu_addr_base
, vu_addr
;
1406 address_word vutrack_addr_base
, vutrack_addr
;
1407 unsigned_4
* operand
;
1408 struct fifo_quadword
* fq
= pke_pc_fifo(me
, num
, & operand
);
1411 PKE_REG_MASK_SET(me
, NUM
, NUM
, (num
*2 - i
) / 2);
1413 /* imm: in 64-bit units for MPG instruction */
1414 /* VU*_MEM0 : instruction memory */
1415 vu_addr_base
= (me
->pke_number
== 0) ?
1416 VU0_MEM0_WINDOW_START
: VU0_MEM0_WINDOW_START
;
1417 vu_addr
= vu_addr_base
+ (imm
*2) + i
;
1419 /* VU*_MEM0_TRACK : source-addr tracking table */
1420 vutrack_addr_base
= (me
->pke_number
== 0) ?
1421 VU0_MEM0_SRCADDR_START
: VU1_MEM0_SRCADDR_START
;
1422 vutrack_addr
= vu_addr_base
+ (imm
*2) + i
;
1424 /* write data into VU memory */
1425 pke_track_write(me
, operand
, sizeof(unsigned_4
),
1426 vu_addr
, fq
->source_address
);
1428 /* write srcaddr into VU srcaddr tracking table */
1430 (SIM_ADDR
) vutrack_addr
,
1431 (void*) & fq
->source_address
,
1432 sizeof(unsigned_4
));
1433 } /* VU xfer loop */
1436 ASSERT(PKE_REG_MASK_GET(me
, NUM
, NUM
) == 0);
1439 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1440 pke_pc_advance(me
, 1 + num
*2);
1445 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_STALL
);
1446 /* retry this instruction next clock */
1448 } /* if FIFO full enough */
1451 /* need to wait for another word */
1452 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1453 /* retry this instruction next clock */
1459 pke_code_direct(struct pke_device
* me
, unsigned_4 pkecode
)
1461 /* check that FIFO has a few more words for DIRECT operand */
1462 unsigned_4
* last_direct_word
;
1463 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1464 int num
= BIT_MASK_GET(pkecode
, PKE_OPCODE_NUM_B
, PKE_OPCODE_NUM_E
);
1466 /* map zero to max+1 */
1467 if(imm
==0) imm
=0x10000;
1469 last_direct_word
= pke_pc_operand(me
, imm
*4); /* num: number of 128-bit words */
1470 if(last_direct_word
!= NULL
)
1476 /* "transferring" operand */
1477 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1479 /* transfer GPUIF quadwords, one word per iteration */
1480 for(i
=0; i
<imm
*4; i
++)
1482 unsigned_4
* operand
;
1483 struct fifo_quadword
* fq
= pke_pc_fifo(me
, num
, &operand
);
1485 /* collect word into quadword */
1486 fifo_data
[i
%4] = *operand
;
1488 /* write to GPUIF FIFO only with full word */
1491 address_word gpuif_fifo
= GIF_PATH2_FIFO_ADDR
+(i
/4);
1492 pke_track_write(me
, fifo_data
, sizeof(quadword
),
1493 (SIM_ADDR
) gpuif_fifo
, fq
->source_address
);
1494 } /* write collected quadword */
1496 } /* GPUIF xfer loop */
1499 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1500 pke_pc_advance(me
, 1 + imm
*4);
1501 } /* if FIFO full enough */
1504 /* need to wait for another word */
1505 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1506 /* retry this instruction next clock */
1512 pke_code_directhl(struct pke_device
* me
, unsigned_4 pkecode
)
1514 /* treat the same as DIRECTH */
1515 pke_code_direct(me
, pkecode
);
1520 pke_code_unpack(struct pke_device
* me
, unsigned_4 pkecode
)
1522 int imm
= BIT_MASK_GET(pkecode
, PKE_OPCODE_IMM_B
, PKE_OPCODE_IMM_E
);
1523 int cmd
= BIT_MASK_GET(pkecode
, PKE_OPCODE_CMD_B
, PKE_OPCODE_CMD_E
);
1524 int num
= BIT_MASK_GET(pkecode
, PKE_OPCODE_NUM_B
, PKE_OPCODE_NUM_E
);
1525 short vn
= BIT_MASK_GET(cmd
, 2, 3); /* unpack shape controls */
1526 short vl
= BIT_MASK_GET(cmd
, 0, 1);
1527 int m
= BIT_MASK_GET(cmd
, 4, 4);
1528 short cl
= PKE_REG_MASK_GET(me
, CYCLE
, CL
); /* cycle controls */
1529 short wl
= PKE_REG_MASK_GET(me
, CYCLE
, WL
);
1530 int r
= BIT_MASK_GET(imm
, 15, 15); /* indicator bits in imm value */
1531 int sx
= BIT_MASK_GET(imm
, 14, 14);
1533 int n
, num_operands
;
1534 unsigned_4
* last_operand_word
;
1536 /* map zero to max+1 */
1537 if(num
==0) num
=0x100;
1539 /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */
1543 n
= cl
* (num
/wl
) + PKE_LIMIT(num
% wl
, cl
);
1544 num_operands
= ((32 >> vl
) * (vn
+1) * n
)/32;
1546 /* confirm that FIFO has enough words in it */
1547 last_operand_word
= pke_pc_operand(me
, num_operands
);
1548 if(last_operand_word
!= NULL
)
1550 address_word vu_addr_base
;
1553 /* "transferring" operand */
1554 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_XFER
);
1556 /* don't check whether VU is idle */
1558 /* compute VU address base */
1559 if(me
->pke_number
== 0)
1560 vu_addr_base
= VU0_MEM1_WINDOW_START
+ BIT_MASK_GET(imm
, 0, 9);
1563 vu_addr_base
= VU1_MEM1_WINDOW_START
+ BIT_MASK_GET(imm
, 0, 9);
1564 if(r
) vu_addr_base
+= PKE_REG_MASK_GET(me
, TOPS
, TOPS
);
1568 PKE_REG_MASK_SET(me
, NUM
, NUM
, num
);
1570 /* transfer given number of vectors */
1571 vector_num
= 0; /* output vector number being processed */
1574 quadword vu_old_data
;
1575 quadword vu_new_data
;
1576 quadword unpacked_data
;
1577 address_word vu_addr
;
1578 unsigned_4 source_addr
= 0;
1582 PKE_REG_MASK_SET(me
, NUM
, NUM
,
1583 PKE_REG_MASK_GET(me
, NUM
, NUM
) - 1);
1585 /* compute VU destination address, as bytes in R5900 memory */
1588 /* map zero to max+1 */
1589 if(wl
== 0) wl
= 0x0100;
1590 vu_addr
= vu_addr_base
+ 16*(cl
*(vector_num
/wl
) + (vector_num
%wl
));
1593 vu_addr
= vu_addr_base
+ 16*vector_num
;
1595 /* XXX: can vu_addr overflow? */
1597 /* read old VU data word at address */
1598 sim_read(NULL
, (SIM_ADDR
) vu_addr
, (void*) & vu_old_data
, sizeof(vu_old_data
));
1600 /* For cyclic unpack, next operand quadword may come from instruction stream
1602 if((cl
< wl
) && ((vector_num
% wl
) >= cl
)) /* wl != 0, set above */
1604 /* clear operand - used only in a "indeterminate" state */
1605 for(i
= 0; i
< 4; i
++)
1606 unpacked_data
[i
] = 0;
1610 /* compute packed vector dimensions */
1611 int vectorbits
, unitbits
;
1613 if(vl
< 3) /* PKE_UNPACK_*_{32,16,8} */
1615 unitbits
= (32 >> vl
);
1616 vectorbits
= unitbits
* (vn
+1);
1618 else if(vl
== 3 && vn
== 3) /* PKE_UNPACK_V4_5 */
1623 else /* illegal unpack variant */
1625 /* treat as illegal instruction */
1626 pke_code_error(me
, pkecode
);
1630 /* loop over columns */
1631 for(i
=0; i
<=vn
; i
++)
1635 /* offset in bits in current operand word */
1637 (vector_num
* vectorbits
) + (i
* unitbits
); /* # of bits from PKEcode */
1639 /* last unit of V4_5 is only one bit wide */
1640 if(vl
== 3 && vn
== 3 && i
== 3) /* PKE_UNPACK_V4_5 */
1643 /* fetch bitfield operand */
1644 operand
= pke_pc_operand_bits(me
, bitoffset
, unitbits
, & source_addr
);
1646 /* selectively sign-extend; not for V4_5 1-bit value */
1647 if(sx
&& unitbits
> 0)
1648 unpacked_data
[i
] = SEXT32(operand
, unitbits
-1);
1650 unpacked_data
[i
] = operand
;
1652 } /* unpack word from instruction operand */
1654 /* compute replacement word */
1655 if(m
) /* use mask register? */
1657 /* compute index into mask register for this word */
1658 int mask_index
= PKE_LIMIT(vector_num
% wl
, 3); /* wl != 0, set above */
1660 for(i
=0; i
<3; i
++) /* loop over columns */
1662 int mask_op
= PKE_MASKREG_GET(me
, mask_index
, i
);
1663 unsigned_4
* masked_value
= NULL
;
1664 unsigned_4 zero
= 0;
1668 case PKE_MASKREG_INPUT
:
1669 /* for vn == 0, all columns are copied from column 0 */
1671 masked_value
= & unpacked_data
[0];
1673 masked_value
= & zero
; /* arbitrary data: undefined in spec */
1675 masked_value
= & unpacked_data
[i
];
1678 case PKE_MASKREG_ROW
: /* exploit R0..R3 contiguity */
1679 masked_value
= & me
->regs
[PKE_REG_R0
+ i
][0];
1682 case PKE_MASKREG_COLUMN
: /* exploit C0..C3 contiguity */
1683 masked_value
= & me
->regs
[PKE_REG_C0
+ PKE_LIMIT(vector_num
,3)][0];
1686 case PKE_MASKREG_NOTHING
:
1687 /* "write inhibit" by re-copying old data */
1688 masked_value
= & vu_old_data
[i
];
1693 /* no other cases possible */
1696 /* copy masked value for column */
1697 vu_new_data
[i
] = *masked_value
;
1698 } /* loop over columns */
1702 /* no mask - just copy over entire unpacked quadword */
1703 memcpy(vu_new_data
, unpacked_data
, sizeof(unpacked_data
));
1706 /* process STMOD register for accumulation operations */
1707 switch(PKE_REG_MASK_GET(me
, MODE
, MDE
))
1709 case PKE_MODE_ADDROW
: /* add row registers to output data */
1711 /* exploit R0..R3 contiguity */
1712 vu_new_data
[i
] += me
->regs
[PKE_REG_R0
+ i
][0];
1715 case PKE_MODE_ACCROW
: /* add row registers to output data; accumulate */
1718 /* exploit R0..R3 contiguity */
1719 vu_new_data
[i
] += me
->regs
[PKE_REG_R0
+ i
][0];
1720 me
->regs
[PKE_REG_R0
+ i
][0] = vu_new_data
[i
];
1724 case PKE_MODE_INPUT
: /* pass data through */
1729 /* write replacement word */
1730 pke_track_write(me
, vu_new_data
, sizeof(vu_new_data
),
1731 (SIM_ADDR
) vu_addr
, source_addr
);
1733 /* next vector please */
1735 } /* vector transfer loop */
1736 while(PKE_REG_MASK_GET(me
, NUM
, NUM
) > 0);
1739 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1740 pke_pc_advance(me
, 1 + num_operands
);
1741 } /* PKE FIFO full enough */
1744 /* need to wait for another word */
1745 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_WAIT
);
1746 /* retry this instruction next clock */
1752 pke_code_error(struct pke_device
* me
, unsigned_4 pkecode
)
1754 /* set ER1 flag in STAT register */
1755 PKE_REG_MASK_SET(me
, STAT
, ER1
, 1);
1756 /* advance over faulty word */
1757 PKE_REG_MASK_SET(me
, STAT
, PPS
, PKE_REG_STAT_PPS_IDLE
);
1758 pke_pc_advance(me
, 1);