1c94c996d120a6d13145a8e2032bbd8034d14a1b
[deliverable/binutils-gdb.git] / sim / mips / sky-pke.c
1 /* Copyright (C) 1998, Cygnus Solutions */
2
3 /* Debugguing PKE? */
4 #define PKE_DEBUG
5
6 #include <stdlib.h>
7 #include "sky-pke.h"
8 #include "sky-dma.h"
9 #include "sim-bits.h"
10 #include "sim-assert.h"
11 #include "sky-vu0.h"
12 #include "sky-vu1.h"
13 #include "sky-gpuif.h"
14
15
16 /* Imported functions */
17
18 void device_error (device *me, char* message); /* device.c */
19
20
21 /* Internal function declarations */
22
23 static int pke_io_read_buffer(device*, void*, int, address_word,
24 unsigned, sim_cpu*, sim_cia);
25 static int pke_io_write_buffer(device*, const void*, int, address_word,
26 unsigned, sim_cpu*, sim_cia);
27 static void pke_issue(struct pke_device*);
28 static void pke_pc_advance(struct pke_device*, int num_words);
29 static unsigned_4* pke_pc_operand(struct pke_device*, int operand_num);
30 static unsigned_4 pke_pc_operand_bits(struct pke_device*, int bit_offset,
31 int bit_width, unsigned_4* sourceaddr);
32 static struct fifo_quadword* pke_pc_fifo(struct pke_device*, int operand_num,
33 unsigned_4** operand);
34 static int pke_track_write(struct pke_device*, const void* src, int len,
35 address_word dest, unsigned_4 sourceaddr);
36 static void pke_attach(SIM_DESC sd, struct pke_device* me);
37 enum pke_check_target { chk_vu, chk_path1, chk_path2, chk_path3 };
38 static int pke_check_stall(struct pke_device* me, enum pke_check_target what);
39 static void pke_flip_dbf(struct pke_device* me);
40 /* PKEcode handlers */
41 static void pke_code_nop(struct pke_device* me, unsigned_4 pkecode);
42 static void pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode);
43 static void pke_code_offset(struct pke_device* me, unsigned_4 pkecode);
44 static void pke_code_base(struct pke_device* me, unsigned_4 pkecode);
45 static void pke_code_itop(struct pke_device* me, unsigned_4 pkecode);
46 static void pke_code_stmod(struct pke_device* me, unsigned_4 pkecode);
47 static void pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode);
48 static void pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode);
49 static void pke_code_flushe(struct pke_device* me, unsigned_4 pkecode);
50 static void pke_code_flush(struct pke_device* me, unsigned_4 pkecode);
51 static void pke_code_flusha(struct pke_device* me, unsigned_4 pkecode);
52 static void pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode);
53 static void pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode);
54 static void pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode);
55 static void pke_code_stmask(struct pke_device* me, unsigned_4 pkecode);
56 static void pke_code_strow(struct pke_device* me, unsigned_4 pkecode);
57 static void pke_code_stcol(struct pke_device* me, unsigned_4 pkecode);
58 static void pke_code_mpg(struct pke_device* me, unsigned_4 pkecode);
59 static void pke_code_direct(struct pke_device* me, unsigned_4 pkecode);
60 static void pke_code_directhl(struct pke_device* me, unsigned_4 pkecode);
61 static void pke_code_unpack(struct pke_device* me, unsigned_4 pkecode);
62 static void pke_code_error(struct pke_device* me, unsigned_4 pkecode);
63
64
65
66 /* Static data */
67
68 struct pke_device pke0_device =
69 {
70 { "pke0", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
71 0, 0, /* ID, flags */
72 {}, /* regs */
73 {}, 0, /* FIFO write buffer */
74 NULL, 0, 0, NULL, /* FIFO */
75 0, 0 /* pc */
76 };
77
78
79 struct pke_device pke1_device =
80 {
81 { "pke1", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
82 1, 0, /* ID, flags */
83 {}, /* regs */
84 {}, 0, /* FIFO write buffer */
85 NULL, 0, 0, NULL, /* FIFO */
86 0, 0 /* pc */
87 };
88
89
90
91 /* External functions */
92
93
94 /* Attach PKE addresses to main memory */
95
96 void
97 pke0_attach(SIM_DESC sd)
98 {
99 pke_attach(sd, & pke0_device);
100 }
101
102 void
103 pke1_attach(SIM_DESC sd)
104 {
105 pke_attach(sd, & pke1_device);
106 }
107
108
109
110 /* Issue a PKE instruction if possible */
111
112 void
113 pke0_issue(void)
114 {
115 pke_issue(& pke0_device);
116 }
117
118 void
119 pke1_issue(void)
120 {
121 pke_issue(& pke0_device);
122 }
123
124
125
126 /* Internal functions */
127
128
129 /* Attach PKE memory regions to simulator */
130
131 void
132 pke_attach(SIM_DESC sd, struct pke_device* me)
133 {
134 /* register file */
135 sim_core_attach (sd,
136 NULL,
137 0 /*level*/,
138 access_read_write,
139 0 /*space ???*/,
140 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START,
141 PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/,
142 0 /*modulo*/,
143 (device*) &pke0_device,
144 NULL /*buffer*/);
145
146 /* FIFO port */
147 sim_core_attach (sd,
148 NULL,
149 0 /*level*/,
150 access_read_write,
151 0 /*space ???*/,
152 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR,
153 sizeof(quadword) /*nr_bytes*/,
154 0 /*modulo*/,
155 (device*) &pke1_device,
156 NULL /*buffer*/);
157
158 /* source-addr tracking word */
159 sim_core_attach (sd,
160 NULL,
161 0 /*level*/,
162 access_read_write,
163 0 /*space ???*/,
164 (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
165 sizeof(unsigned_4) /*nr_bytes*/,
166 0 /*modulo*/,
167 NULL,
168 zalloc(sizeof(unsigned_4)) /*buffer*/);
169 }
170
171
172
173 /* Handle a PKE read; return no. of bytes read */
174
175 int
176 pke_io_read_buffer(device *me_,
177 void *dest,
178 int space,
179 address_word addr,
180 unsigned nr_bytes,
181 sim_cpu *cpu,
182 sim_cia cia)
183 {
184 /* downcast to gather embedding pke_device struct */
185 struct pke_device* me = (struct pke_device*) me_;
186
187 /* find my address ranges */
188 address_word my_reg_start =
189 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
190 address_word my_fifo_addr =
191 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
192
193 /* enforce that an access does not span more than one quadword */
194 address_word low = ADDR_TRUNC_QW(addr);
195 address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
196 if(low != high)
197 return 0;
198
199 /* classify address & handle */
200 if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
201 {
202 /* register bank */
203 int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
204 int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
205 int readable = 1;
206 quadword result;
207
208 /* clear result */
209 result[0] = result[1] = result[2] = result[3] = 0;
210
211 /* handle reads to individual registers; clear `readable' on error */
212 switch(reg_num)
213 {
214 /* handle common case of register reading, side-effect free */
215 /* PKE1-only registers*/
216 case PKE_REG_BASE:
217 case PKE_REG_OFST:
218 case PKE_REG_TOPS:
219 case PKE_REG_TOP:
220 case PKE_REG_DBF:
221 if(me->pke_number == 0)
222 readable = 0;
223 /* fall through */
224 /* PKE0 & PKE1 common registers*/
225 case PKE_REG_STAT:
226 case PKE_REG_ERR:
227 case PKE_REG_MARK:
228 case PKE_REG_CYCLE:
229 case PKE_REG_MODE:
230 case PKE_REG_NUM:
231 case PKE_REG_MASK:
232 case PKE_REG_CODE:
233 case PKE_REG_ITOPS:
234 case PKE_REG_ITOP:
235 case PKE_REG_R0:
236 case PKE_REG_R1:
237 case PKE_REG_R2:
238 case PKE_REG_R3:
239 case PKE_REG_C0:
240 case PKE_REG_C1:
241 case PKE_REG_C2:
242 case PKE_REG_C3:
243 result[0] = me->regs[reg_num][0];
244 break;
245
246 /* handle common case of write-only registers */
247 case PKE_REG_FBRST:
248 readable = 0;
249 break;
250
251 default:
252 ASSERT(0); /* test above should prevent this possibility */
253 }
254
255 /* perform transfer & return */
256 if(readable)
257 {
258 /* copy the bits */
259 memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes);
260 /* okay */
261 return nr_bytes;
262 }
263 else
264 {
265 /* error */
266 return 0;
267 }
268
269 /* NOTREACHED */
270 }
271 else if(addr >= my_fifo_addr &&
272 addr < my_fifo_addr + sizeof(quadword))
273 {
274 /* FIFO */
275
276 /* FIFO is not readable: return a word of zeroes */
277 memset(dest, 0, nr_bytes);
278 return nr_bytes;
279 }
280
281 /* NOTREACHED */
282 return 0;
283 }
284
285
286 /* Handle a PKE read; return no. of bytes written */
287
288 int
289 pke_io_write_buffer(device *me_,
290 const void *src,
291 int space,
292 address_word addr,
293 unsigned nr_bytes,
294 sim_cpu *cpu,
295 sim_cia cia)
296 {
297 /* downcast to gather embedding pke_device struct */
298 struct pke_device* me = (struct pke_device*) me_;
299
300 /* find my address ranges */
301 address_word my_reg_start =
302 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
303 address_word my_fifo_addr =
304 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
305
306 /* enforce that an access does not span more than one quadword */
307 address_word low = ADDR_TRUNC_QW(addr);
308 address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
309 if(low != high)
310 return 0;
311
312 /* classify address & handle */
313 if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
314 {
315 /* register bank */
316 int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
317 int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
318 int writeable = 1;
319 quadword input;
320
321 /* clear input */
322 input[0] = input[1] = input[2] = input[3] = 0;
323
324 /* write user-given bytes into input */
325 memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes);
326
327 /* handle writes to individual registers; clear `writeable' on error */
328 switch(reg_num)
329 {
330 case PKE_REG_FBRST:
331 /* Order these tests from least to most overriding, in case
332 multiple bits are set. */
333 if(BIT_MASK_GET(input[0], 2, 2)) /* STC bit */
334 {
335 /* clear a bunch of status bits */
336 PKE_REG_MASK_SET(me, STAT, PSS, 0);
337 PKE_REG_MASK_SET(me, STAT, PFS, 0);
338 PKE_REG_MASK_SET(me, STAT, PIS, 0);
339 PKE_REG_MASK_SET(me, STAT, INT, 0);
340 PKE_REG_MASK_SET(me, STAT, ER0, 0);
341 PKE_REG_MASK_SET(me, STAT, ER1, 0);
342 me->flags &= ~PKE_FLAG_PENDING_PSS;
343 /* will allow resumption of possible stalled instruction */
344 }
345 if(BIT_MASK_GET(input[0], 2, 2)) /* STP bit */
346 {
347 me->flags |= PKE_FLAG_PENDING_PSS;
348 }
349 if(BIT_MASK_GET(input[0], 1, 1)) /* FBK bit */
350 {
351 PKE_REG_MASK_SET(me, STAT, PFS, 1);
352 }
353 if(BIT_MASK_GET(input[0], 0, 0)) /* RST bit */
354 {
355 /* clear FIFO by skipping to word after PC: also
356 prevents re-execution attempt of possible stalled
357 instruction */
358 me->fifo_num_elements = me->fifo_pc;
359 /* clear registers, flag, other state */
360 memset(me->regs, 0, sizeof(me->regs));
361 me->fifo_qw_done = 0;
362 me->flags = 0;
363 me->qw_pc = 0;
364 }
365 break;
366
367 case PKE_REG_ERR:
368 /* copy bottom three bits */
369 BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input[0], 0, 2));
370 break;
371
372 case PKE_REG_MARK:
373 /* copy bottom sixteen bits */
374 PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input[0], 0, 15));
375 /* reset MRK bit in STAT */
376 PKE_REG_MASK_SET(me, STAT, MRK, 0);
377 break;
378
379 /* handle common case of read-only registers */
380 /* PKE1-only registers - not really necessary to handle separately */
381 case PKE_REG_BASE:
382 case PKE_REG_OFST:
383 case PKE_REG_TOPS:
384 case PKE_REG_TOP:
385 case PKE_REG_DBF:
386 if(me->pke_number == 0)
387 writeable = 0;
388 /* fall through */
389 /* PKE0 & PKE1 common registers*/
390 case PKE_REG_STAT:
391 /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */
392 case PKE_REG_CYCLE:
393 case PKE_REG_MODE:
394 case PKE_REG_NUM:
395 case PKE_REG_MASK:
396 case PKE_REG_CODE:
397 case PKE_REG_ITOPS:
398 case PKE_REG_ITOP:
399 case PKE_REG_R0:
400 case PKE_REG_R1:
401 case PKE_REG_R2:
402 case PKE_REG_R3:
403 case PKE_REG_C0:
404 case PKE_REG_C1:
405 case PKE_REG_C2:
406 case PKE_REG_C3:
407 writeable = 0;
408 break;
409
410 default:
411 ASSERT(0); /* test above should prevent this possibility */
412 }
413
414 /* perform return */
415 if(writeable)
416 {
417 /* okay */
418 return nr_bytes;
419 }
420 else
421 {
422 /* error */
423 return 0;
424 }
425
426 /* NOTREACHED */
427 }
428 else if(addr >= my_fifo_addr &&
429 addr < my_fifo_addr + sizeof(quadword))
430 {
431 /* FIFO */
432 struct fifo_quadword* fqw;
433 int fifo_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside fifo quadword */
434 int i;
435
436 /* collect potentially-partial quadword in write buffer */
437 memcpy(((unsigned_1*)& me->fifo_qw_in_progress) + fifo_byte, src, nr_bytes);
438 /* mark bytes written */
439 for(i = fifo_byte; i < fifo_byte + nr_bytes; i++)
440 BIT_MASK_SET(me->fifo_qw_done, i, i, 1);
441
442 /* return if quadword not quite written yet */
443 if(BIT_MASK_GET(me->fifo_qw_done, 0, sizeof(quadword)-1) !=
444 BIT_MASK_BTW(0, sizeof(quadword)))
445 return nr_bytes;
446
447 /* all done - process quadword after clearing flag */
448 BIT_MASK_SET(me->fifo_qw_done, 0, sizeof(quadword)-1, 0);
449
450 /* ensure FIFO has enough elements */
451 if(me->fifo_num_elements == me->fifo_buffer_size)
452 {
453 /* time to grow */
454 int new_fifo_buffer_size = me->fifo_buffer_size + 20;
455 void* ptr = realloc((void*) me->fifo, new_fifo_buffer_size*sizeof(quadword));
456
457 if(ptr == NULL)
458 {
459 /* oops, cannot enlarge FIFO any more */
460 device_error(me_, "Cannot enlarge FIFO buffer\n");
461 return 0;
462 }
463
464 me->fifo_buffer_size = new_fifo_buffer_size;
465 }
466
467 /* add new quadword at end of FIFO */
468 fqw = & me->fifo[me->fifo_num_elements];
469 memcpy((void*) fqw->data, me->fifo_qw_in_progress, sizeof(quadword));
470 sim_read(CPU_STATE(cpu),
471 (SIM_ADDR) (me->pke_number == 0 ? DMA_D0_SRCADDR : DMA_D1_SRCADDR),
472 (void*) & fqw->source_address,
473 sizeof(address_word));
474 sim_read(CPU_STATE(cpu),
475 (SIM_ADDR) (me->pke_number == 0 ? DMA_D0_PKTFLAG : DMA_D1_PKTFLAG),
476 (void*) & fqw->dma_tag_present,
477 sizeof(unsigned_4));
478
479 me->fifo_num_elements++;
480
481 /* set FQC to "1" as FIFO is now not empty */
482 PKE_REG_MASK_SET(me, STAT, FQC, 1);
483
484 /* okay */
485 return nr_bytes;
486 }
487
488 /* NOTREACHED */
489 return 0;
490 }
491
492
493
494 /* Issue & swallow next PKE opcode if possible/available */
495
496 void
497 pke_issue(struct pke_device* me)
498 {
499 struct fifo_quadword* fqw;
500 unsigned_4 fw;
501 unsigned_4 cmd, intr, num;
502 unsigned_4 imm;
503
504 /* 1 -- test go / no-go for PKE execution */
505
506 /* switch on STAT:PSS if PSS-pending and in idle state */
507 if((PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE) &&
508 (me->flags & PKE_FLAG_PENDING_PSS) != 0)
509 {
510 me->flags &= ~PKE_FLAG_PENDING_PSS;
511 PKE_REG_MASK_SET(me, STAT, PSS, 1);
512 }
513
514 /* check for stall/halt control bits */
515 if(PKE_REG_MASK_GET(me, STAT, PFS) ||
516 PKE_REG_MASK_GET(me, STAT, PSS) || /* note special treatment below */
517 /* PEW bit not a reason to keep stalling - it's re-checked below */
518 /* PGW bit not a reason to keep stalling - it's re-checked below */
519 /* maskable stall controls: ER0, ER1, PIS */
520 (PKE_REG_MASK_GET(me, STAT, ER0) && !PKE_REG_MASK_GET(me, ERR, ME0)) ||
521 (PKE_REG_MASK_GET(me, STAT, ER1) && !PKE_REG_MASK_GET(me, ERR, ME1)) ||
522 (PKE_REG_MASK_GET(me, STAT, PIS) && !PKE_REG_MASK_GET(me, ERR, MII)))
523 {
524 /* try again next cycle; no state change */
525 return;
526 }
527
528 /* confirm availability of new quadword of PKE instructions */
529 if(me->fifo_num_elements <= me->fifo_pc)
530 return;
531
532
533 /* 2 -- fetch PKE instruction */
534
535 /* skip over DMA tag, if present */
536 pke_pc_advance(me, 0);
537
538 /* "fetch" instruction quadword and word */
539 fqw = & me->fifo[me->fifo_pc];
540 fw = fqw->data[me->qw_pc];
541
542 /* store word in PKECODE register */
543 me->regs[PKE_REG_CODE][0] = fw;
544
545
546 /* 3 -- decode PKE instruction */
547
548 /* PKE instruction format: [intr 0:0][pke-command 6:0][num 7:0][immediate 15:0],
549 so op-code is in top byte. */
550 intr = BIT_MASK_GET(fw, PKE_OPCODE_I_B, PKE_OPCODE_I_E);
551 cmd = BIT_MASK_GET(fw, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E);
552 num = BIT_MASK_GET(fw, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
553 imm = BIT_MASK_GET(fw, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
554
555 if(intr)
556 {
557 /* set INT flag in STAT register */
558 PKE_REG_MASK_SET(me, STAT, INT, 1);
559 /* XXX: how to send interrupt to R5900? */
560 }
561
562 /* decoding */
563 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE);
564
565 /* decode & execute */
566 if(IS_PKE_CMD(cmd, PKENOP))
567 pke_code_nop(me, fw);
568 else if(IS_PKE_CMD(cmd, STCYCL))
569 pke_code_stcycl(me, fw);
570 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, OFFSET))
571 pke_code_offset(me, fw);
572 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, BASE))
573 pke_code_base(me, fw);
574 else if(IS_PKE_CMD(cmd, ITOP))
575 pke_code_itop(me, fw);
576 else if(IS_PKE_CMD(cmd, STMOD))
577 pke_code_stmod(me, fw);
578 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3))
579 pke_code_mskpath3(me, fw);
580 else if(IS_PKE_CMD(cmd, PKEMARK))
581 pke_code_pkemark(me, fw);
582 else if(IS_PKE_CMD(cmd, FLUSHE))
583 pke_code_flushe(me, fw);
584 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSH))
585 pke_code_flush(me, fw);
586 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA))
587 pke_code_flusha(me, fw);
588 else if(IS_PKE_CMD(cmd, PKEMSCAL))
589 pke_code_pkemscal(me, fw);
590 else if(IS_PKE_CMD(cmd, PKEMSCNT))
591 pke_code_pkemscnt(me, fw);
592 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF))
593 pke_code_pkemscalf(me, fw);
594 else if(IS_PKE_CMD(cmd, STMASK))
595 pke_code_stmask(me, fw);
596 else if(IS_PKE_CMD(cmd, STROW))
597 pke_code_strow(me, fw);
598 else if(IS_PKE_CMD(cmd, STCOL))
599 pke_code_stcol(me, fw);
600 else if(IS_PKE_CMD(cmd, MPG))
601 pke_code_mpg(me, fw);
602 else if(IS_PKE_CMD(cmd, DIRECT))
603 pke_code_direct(me, fw);
604 else if(IS_PKE_CMD(cmd, DIRECTHL))
605 pke_code_directhl(me, fw);
606 else if(IS_PKE_CMD(cmd, UNPACK))
607 pke_code_unpack(me, fw);
608 /* ... no other commands ... */
609 else
610 pke_code_error(me, fw);
611 }
612
613
614
615 /* advance the PC by given number of data words; update STAT/FQC
616 field; assume FIFO is filled enough */
617
618 void
619 pke_pc_advance(struct pke_device* me, int num_words)
620 {
621 int num = num_words;
622 ASSERT(num_words > 0);
623
624 while(num > 0)
625 {
626 struct fifo_quadword* fq;
627
628 /* one word skipped */
629 num --;
630
631 /* point to next word */
632 me->qw_pc ++;
633 if(me->qw_pc == 4)
634 {
635 me->qw_pc = 0;
636 me->fifo_pc ++;
637 }
638
639 /* skip over DMA tag words if present in word 0 or 1 */
640 fq = & me->fifo[me->fifo_pc];
641 if(fq->dma_tag_present && (me->qw_pc < 2))
642 {
643 /* skip by going around loop an extra time */
644 num ++;
645 }
646 }
647
648 /* clear FQC if FIFO is now empty */
649 if(me->fifo_num_elements == me->fifo_pc)
650 {
651 PKE_REG_MASK_SET(me, STAT, FQC, 0);
652 }
653 }
654
655
656
657 /* Return pointer to FIFO quadword containing given operand# in FIFO.
658 `operand_num' starts at 1. Return pointer to operand word in last
659 argument, if non-NULL. If FIFO is not full enough, return 0.
660 Signal an ER0 indication upon skipping a DMA tag. */
661
662 struct fifo_quadword*
663 pke_pc_fifo(struct pke_device* me, int operand_num, unsigned_4** operand)
664 {
665 int num = operand_num;
666 int new_qw_pc, new_fifo_pc;
667 struct fifo_quadword* operand_fifo = NULL;
668
669 ASSERT(num > 0);
670
671 /* snapshot current pointers */
672 new_fifo_pc = me->fifo_pc;
673 new_qw_pc = me->qw_pc;
674
675 while(num > 0)
676 {
677 /* one word skipped */
678 num --;
679
680 /* point to next word */
681 new_qw_pc ++;
682 if(new_qw_pc == 4)
683 {
684 new_qw_pc = 0;
685 new_fifo_pc ++;
686 }
687
688 /* check for FIFO underflow */
689 if(me->fifo_num_elements == new_fifo_pc)
690 {
691 operand_fifo = NULL;
692 break;
693 }
694
695 /* skip over DMA tag words if present in word 0 or 1 */
696 operand_fifo = & me->fifo[new_fifo_pc];
697 if(operand_fifo->dma_tag_present && (new_qw_pc < 2))
698 {
699 /* mismatch error! */
700 PKE_REG_MASK_SET(me, STAT, ER0, 1);
701 /* skip by going around loop an extra time */
702 num ++;
703 }
704 }
705
706 /* return pointer to operand word itself */
707 if(operand_fifo != NULL)
708 *operand = & operand_fifo->data[new_qw_pc];
709
710 return operand_fifo;
711 }
712
713
714 /* Return pointer to given operand# in FIFO. `operand_num' starts at 1.
715 If FIFO is not full enough, return 0. Skip over DMA tags, but mark
716 them as an error (ER0). */
717
718 unsigned_4*
719 pke_pc_operand(struct pke_device* me, int operand_num)
720 {
721 unsigned_4* operand = NULL;
722 struct fifo_quadword* fifo_operand;
723
724 fifo_operand = pke_pc_fifo(me, operand_num, & operand);
725
726 if(fifo_operand == NULL)
727 ASSERT(operand == NULL); /* pke_pc_fifo() ought leave it untouched */
728
729 return operand;
730 }
731
732
733 /* Return a bit-field extract of given operand# in FIFO, and its
734 source-addr. `bit_offset' starts at 0, referring to LSB after PKE
735 instruction word. Width must be >0, <=32. Assume FIFO is full
736 enough. Skip over DMA tags, but mark them as an error (ER0). */
737
738 unsigned_4
739 pke_pc_operand_bits(struct pke_device* me, int bit_offset, int bit_width, unsigned_4* source_addr)
740 {
741 unsigned_4* word = NULL;
742 unsigned_4 value;
743 struct fifo_quadword* fifo_operand;
744
745 /* find operand word with bitfield */
746 fifo_operand = pke_pc_fifo(me, (bit_offset / 32) + 1, &word);
747 ASSERT(word != 0);
748
749 /* extract bitfield from word */
750 value = BIT_MASK_GET(*word, bit_offset % 32, bit_width);
751
752 /* extract source addr from fifo word */
753 *source_addr = fifo_operand->source_address;
754
755 return value;
756 }
757
758
759
760
761
762 /* Write a bunch of bytes into simulator memory. Store the given source address into the
763 PKE sourceaddr tracking word. */
764 int
765 pke_track_write(struct pke_device* me, const void* src, int len,
766 address_word dest, unsigned_4 sourceaddr)
767 {
768 int rc;
769 unsigned_4 no_sourceaddr = 0;
770
771 /* write srcaddr into PKE srcaddr tracking */
772 sim_write(NULL,
773 (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
774 (void*) & sourceaddr,
775 sizeof(unsigned_4));
776
777 /* write bytes into simulator */
778 rc = sim_write(NULL,
779 (SIM_ADDR) dest,
780 (void*) src,
781 len);
782
783 /* clear srcaddr from PKE srcaddr tracking */
784 sim_write(NULL,
785 (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
786 (void*) & no_sourceaddr,
787 sizeof(unsigned_4));
788
789 return rc;
790 }
791
792
793 /* check for stall conditions on indicated devices (path* only on PKE1), do not change status
794 return 0 iff no stall */
795 int
796 pke_check_stall(struct pke_device* me, enum pke_check_target what)
797 {
798 int any_stall = 0;
799
800 /* read GPUIF status word - commonly used */
801 unsigned_4 gpuif_stat;
802 sim_read(NULL,
803 (SIM_ADDR) (GIF_REG_STAT),
804 (void*) & gpuif_stat,
805 sizeof(unsigned_4));
806
807 /* perform checks */
808 if(what == chk_vu)
809 {
810 ASSERT(0);
811 /* XXX: have to check COP2 control register VBS0 / VBS1 bits */
812 }
813 else if(what == chk_path1) /* VU -> GPUIF */
814 {
815 if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 1)
816 any_stall = 1;
817 }
818 else if(what == chk_path2) /* PKE -> GPUIF */
819 {
820 if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 2)
821 any_stall = 1;
822 }
823 else if(what == chk_path3) /* DMA -> GPUIF */
824 {
825 if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 3)
826 any_stall = 1;
827 }
828 else
829 {
830 /* invalid what */
831 ASSERT(0);
832 }
833
834 /* any stall reasons? */
835 return any_stall;
836 }
837
838
839 /* flip the DBF bit; recompute TOPS, ITOP & TOP */
840 void
841 pke_flip_dbf(struct pke_device* me)
842 {
843 /* flip DBF */
844 PKE_REG_MASK_SET(me, DBF, DF,
845 PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1);
846 PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF));
847 /* compute new TOPS */
848 PKE_REG_MASK_SET(me, TOPS, TOPS,
849 (PKE_REG_MASK_GET(me, BASE, BASE) +
850 (PKE_REG_MASK_GET(me, DBF, DF) *
851 PKE_REG_MASK_GET(me, OFST, OFFSET))));
852 /* compute new ITOP and TOP */
853 PKE_REG_MASK_SET(me, ITOP, ITOP,
854 PKE_REG_MASK_GET(me, ITOPS, ITOPS));
855 PKE_REG_MASK_SET(me, TOP, TOP,
856 PKE_REG_MASK_GET(me, TOPS, TOPS));
857 }
858
859
860
861 /* PKEcode handler functions -- responsible for checking and
862 confirming old stall conditions, executing pkecode, updating PC and
863 status registers -- may assume being run on correct PKE unit */
864
865 void
866 pke_code_nop(struct pke_device* me, unsigned_4 pkecode)
867 {
868 /* done */
869 pke_pc_advance(me, 1);
870 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
871 }
872
873
874 void
875 pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode)
876 {
877 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
878 /* copy immediate value into CYCLE reg */
879 me->regs[PKE_REG_CYCLE][0] = imm;
880 /* done */
881 pke_pc_advance(me, 1);
882 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
883 }
884
885
886 void
887 pke_code_offset(struct pke_device* me, unsigned_4 pkecode)
888 {
889 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
890 /* copy 10 bits to OFFSET field */
891 PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9));
892 /* clear DBF bit */
893 PKE_REG_MASK_SET(me, DBF, DF, 0);
894 /* clear other DBF bit */
895 PKE_REG_MASK_SET(me, STAT, DBF, 0);
896 /* set TOPS = BASE */
897 PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE));
898 /* done */
899 pke_pc_advance(me, 1);
900 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
901 }
902
903
904 void
905 pke_code_base(struct pke_device* me, unsigned_4 pkecode)
906 {
907 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
908 /* copy 10 bits to BASE field */
909 PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9));
910 /* clear DBF bit */
911 PKE_REG_MASK_SET(me, DBF, DF, 0);
912 /* clear other DBF bit */
913 PKE_REG_MASK_SET(me, STAT, DBF, 0);
914 /* set TOPS = BASE */
915 PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE));
916 /* done */
917 pke_pc_advance(me, 1);
918 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
919 }
920
921
922 void
923 pke_code_itop(struct pke_device* me, unsigned_4 pkecode)
924 {
925 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
926 /* copy 10 bits to ITOPS field */
927 PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9));
928 /* done */
929 pke_pc_advance(me, 1);
930 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
931 }
932
933
934 void
935 pke_code_stmod(struct pke_device* me, unsigned_4 pkecode)
936 {
937 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
938 /* copy 2 bits to MODE register */
939 PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2));
940 /* done */
941 pke_pc_advance(me, 1);
942 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
943 }
944
945
946 void
947 pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode)
948 {
949 ASSERT(0);
950 /* XXX: no easy interface toward GPUIF for this purpose */
951 }
952
953
954 void
955 pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode)
956 {
957 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
958 /* copy 16 bits to MARK register */
959 PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15));
960 /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */
961 PKE_REG_MASK_SET(me, STAT, MRK, 1);
962 /* done */
963 pke_pc_advance(me, 1);
964 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
965 }
966
967
968 void
969 pke_code_flushe(struct pke_device* me, unsigned_4 pkecode)
970 {
971 /* compute next PEW bit */
972 if(pke_check_stall(me, chk_vu))
973 {
974 /* VU busy */
975 PKE_REG_MASK_SET(me, STAT, PEW, 1);
976 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
977 /* try again next cycle */
978 }
979 else
980 {
981 /* VU idle */
982 PKE_REG_MASK_SET(me, STAT, PEW, 0);
983 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
984 pke_pc_advance(me, 1);
985 }
986 }
987
988
989 void
990 pke_code_flush(struct pke_device* me, unsigned_4 pkecode)
991 {
992 int something_busy = 0;
993
994 /* compute next PEW, PGW bits */
995 if(pke_check_stall(me, chk_vu))
996 {
997 something_busy = 1;
998 PKE_REG_MASK_SET(me, STAT, PEW, 1);
999 }
1000 else
1001 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1002
1003
1004 if(pke_check_stall(me, chk_path1) ||
1005 pke_check_stall(me, chk_path2))
1006 {
1007 something_busy = 1;
1008 PKE_REG_MASK_SET(me, STAT, PGW, 1);
1009 }
1010 else
1011 PKE_REG_MASK_SET(me, STAT, PGW, 0);
1012
1013 /* go or no go */
1014 if(something_busy)
1015 {
1016 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1017 /* try again next cycle */
1018 }
1019 else
1020 {
1021 /* all idle */
1022 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1023 pke_pc_advance(me, 1);
1024 }
1025 }
1026
1027
1028 void
1029 pke_code_flusha(struct pke_device* me, unsigned_4 pkecode)
1030 {
1031 int something_busy = 0;
1032
1033 /* compute next PEW, PGW bits */
1034 if(pke_check_stall(me, chk_vu))
1035 {
1036 something_busy = 1;
1037 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1038 }
1039 else
1040 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1041
1042
1043 if(pke_check_stall(me, chk_path1) ||
1044 pke_check_stall(me, chk_path2) ||
1045 pke_check_stall(me, chk_path3))
1046 {
1047 something_busy = 1;
1048 PKE_REG_MASK_SET(me, STAT, PGW, 1);
1049 }
1050 else
1051 PKE_REG_MASK_SET(me, STAT, PGW, 0);
1052
1053 if(something_busy)
1054 {
1055 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1056 /* try again next cycle */
1057 }
1058 else
1059 {
1060 /* all idle */
1061 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1062 pke_pc_advance(me, 1);
1063 }
1064 }
1065
1066
1067 void
1068 pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode)
1069 {
1070 /* compute next PEW bit */
1071 if(pke_check_stall(me, chk_vu))
1072 {
1073 /* VU busy */
1074 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1075 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1076 /* try again next cycle */
1077 }
1078 else
1079 {
1080 unsigned_4 vu_pc;
1081 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1082
1083 /* VU idle */
1084 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1085
1086 /* flip DBF on PKE1 */
1087 if(me->pke_number == 1)
1088 pke_flip_dbf(me);
1089
1090 /* compute new PC for VU */
1091 vu_pc = BIT_MASK_GET(imm, 0, 15);
1092 /* write new PC; callback function gets VU running */
1093 sim_write(NULL,
1094 (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
1095 (void*) & vu_pc,
1096 sizeof(unsigned_4));
1097
1098 /* done */
1099 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1100 pke_pc_advance(me, 1);
1101 }
1102 }
1103
1104
1105
1106 void
1107 pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode)
1108 {
1109 /* compute next PEW bit */
1110 if(pke_check_stall(me, chk_vu))
1111 {
1112 /* VU busy */
1113 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1114 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1115 /* try again next cycle */
1116 }
1117 else
1118 {
1119 unsigned_4 vu_pc;
1120
1121 /* VU idle */
1122 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1123
1124 /* flip DBF on PKE1 */
1125 if(me->pke_number == 1)
1126 pke_flip_dbf(me);
1127
1128 /* read old PC */
1129 sim_read(NULL,
1130 (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
1131 (void*) & vu_pc,
1132 sizeof(unsigned_4));
1133
1134 /* rewrite new PC; callback function gets VU running */
1135 sim_write(NULL,
1136 (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
1137 (void*) & vu_pc,
1138 sizeof(unsigned_4));
1139
1140 /* done */
1141 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1142 pke_pc_advance(me, 1);
1143 }
1144 }
1145
1146
1147 void
1148 pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode)
1149 {
1150 int something_busy = 0;
1151
1152 /* compute next PEW, PGW bits */
1153 if(pke_check_stall(me, chk_vu))
1154 {
1155 something_busy = 1;
1156 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1157 }
1158 else
1159 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1160
1161
1162 if(pke_check_stall(me, chk_path1) ||
1163 pke_check_stall(me, chk_path2) ||
1164 pke_check_stall(me, chk_path3))
1165 {
1166 something_busy = 1;
1167 PKE_REG_MASK_SET(me, STAT, PGW, 1);
1168 }
1169 else
1170 PKE_REG_MASK_SET(me, STAT, PGW, 0);
1171
1172 /* go or no go */
1173 if(something_busy)
1174 {
1175 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1176 /* try again next cycle */
1177 }
1178 else
1179 {
1180 unsigned_4 vu_pc;
1181 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1182
1183 /* flip DBF on PKE1 */
1184 if(me->pke_number == 1)
1185 pke_flip_dbf(me);
1186
1187 /* compute new PC for VU */
1188 vu_pc = BIT_MASK_GET(imm, 0, 15);
1189 /* write new PC; callback function gets VU running */
1190 sim_write(NULL,
1191 (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
1192 (void*) & vu_pc,
1193 sizeof(unsigned_4));
1194
1195 /* done */
1196 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1197 pke_pc_advance(me, 1);
1198 }
1199 }
1200
1201
1202 void
1203 pke_code_stmask(struct pke_device* me, unsigned_4 pkecode)
1204 {
1205 /* check that FIFO has one more word for STMASK operand */
1206 unsigned_4* mask;
1207
1208 mask = pke_pc_operand(me, 1);
1209 if(mask != NULL)
1210 {
1211 /* "transferring" operand */
1212 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1213
1214 /* set NUM */
1215 PKE_REG_MASK_SET(me, NUM, NUM, 1);
1216
1217 /* fill the register */
1218 PKE_REG_MASK_SET(me, MASK, MASK, *mask);
1219
1220 /* set NUM */
1221 PKE_REG_MASK_SET(me, NUM, NUM, 0);
1222
1223 /* done */
1224 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1225 pke_pc_advance(me, 1);
1226 }
1227 else
1228 {
1229 /* need to wait for another word */
1230 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1231 /* try again next cycle */
1232 }
1233 }
1234
1235
1236 void
1237 pke_code_strow(struct pke_device* me, unsigned_4 pkecode)
1238 {
1239 /* check that FIFO has four more words for STROW operand */
1240 unsigned_4* last_op;
1241
1242 last_op = pke_pc_operand(me, 4);
1243 if(last_op != NULL)
1244 {
1245 /* "transferring" operand */
1246 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1247
1248 /* set NUM */
1249 PKE_REG_MASK_SET(me, NUM, NUM, 1);
1250
1251 /* copy ROW registers: must all exist if 4th operand exists */
1252 me->regs[PKE_REG_R0][0] = * pke_pc_operand(me, 1);
1253 me->regs[PKE_REG_R1][0] = * pke_pc_operand(me, 2);
1254 me->regs[PKE_REG_R2][0] = * pke_pc_operand(me, 3);
1255 me->regs[PKE_REG_R3][0] = * pke_pc_operand(me, 4);
1256
1257 /* set NUM */
1258 PKE_REG_MASK_SET(me, NUM, NUM, 0);
1259
1260 /* done */
1261 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1262 pke_pc_advance(me, 5);
1263 }
1264 else
1265 {
1266 /* need to wait for another word */
1267 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1268 /* try again next cycle */
1269 }
1270 }
1271
1272
1273 void
1274 pke_code_stcol(struct pke_device* me, unsigned_4 pkecode)
1275 {
1276 /* check that FIFO has four more words for STCOL operand */
1277 unsigned_4* last_op;
1278
1279 last_op = pke_pc_operand(me, 4);
1280 if(last_op != NULL)
1281 {
1282 /* "transferring" operand */
1283 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1284
1285 /* set NUM */
1286 PKE_REG_MASK_SET(me, NUM, NUM, 1);
1287
1288 /* copy COL registers: must all exist if 4th operand exists */
1289 me->regs[PKE_REG_C0][0] = * pke_pc_operand(me, 1);
1290 me->regs[PKE_REG_C1][0] = * pke_pc_operand(me, 2);
1291 me->regs[PKE_REG_C2][0] = * pke_pc_operand(me, 3);
1292 me->regs[PKE_REG_C3][0] = * pke_pc_operand(me, 4);
1293
1294 /* set NUM */
1295 PKE_REG_MASK_SET(me, NUM, NUM, 0);
1296
1297 /* done */
1298 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1299 pke_pc_advance(me, 5);
1300 }
1301 else
1302 {
1303 /* need to wait for another word */
1304 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1305 /* try again next cycle */
1306 }
1307 }
1308
1309
1310 void
1311 pke_code_mpg(struct pke_device* me, unsigned_4 pkecode)
1312 {
1313 unsigned_4* last_mpg_word;
1314 int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
1315 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1316
1317 /* map zero to max+1 */
1318 if(num==0) num=0x100;
1319
1320 /* check that FIFO has a few more words for MPG operand */
1321 last_mpg_word = pke_pc_operand(me, num*2); /* num: number of 64-bit words */
1322 if(last_mpg_word != NULL)
1323 {
1324 /* perform implied FLUSHE */
1325 if(pke_check_stall(me, chk_vu))
1326 {
1327 /* VU idle */
1328 int i;
1329
1330 /* "transferring" operand */
1331 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1332
1333 /* transfer VU instructions, one word per iteration */
1334 for(i=0; i<num*2; i++)
1335 {
1336 address_word vu_addr_base, vu_addr;
1337 address_word vutrack_addr_base, vutrack_addr;
1338 unsigned_4* operand;
1339 struct fifo_quadword* fq = pke_pc_fifo(me, num, & operand);
1340
1341 /* set NUM */
1342 PKE_REG_MASK_SET(me, NUM, NUM, (num*2 - i) / 2);
1343
1344 /* imm: in 64-bit units for MPG instruction */
1345 /* VU*_MEM0 : instruction memory */
1346 vu_addr_base = (me->pke_number == 0) ?
1347 VU0_MEM0_WINDOW_START : VU0_MEM0_WINDOW_START;
1348 vu_addr = vu_addr_base + (imm*2) + i;
1349
1350 /* VU*_MEM0_TRACK : source-addr tracking table */
1351 vutrack_addr_base = (me->pke_number == 0) ?
1352 VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START;
1353 vutrack_addr = vu_addr_base + (imm*2) + i;
1354
1355 /* write data into VU memory */
1356 pke_track_write(me, operand, sizeof(unsigned_4),
1357 vu_addr, fq->source_address);
1358
1359 /* write srcaddr into VU srcaddr tracking table */
1360 sim_write(NULL,
1361 (SIM_ADDR) vutrack_addr,
1362 (void*) & fq->source_address,
1363 sizeof(unsigned_4));
1364 } /* VU xfer loop */
1365
1366 /* check NUM */
1367 ASSERT(PKE_REG_MASK_GET(me, NUM, NUM) == 0);
1368
1369 /* done */
1370 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1371 pke_pc_advance(me, 1 + num*2);
1372 }
1373 else
1374 {
1375 /* VU busy */
1376 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1377 /* retry this instruction next clock */
1378 }
1379 } /* if FIFO full enough */
1380 else
1381 {
1382 /* need to wait for another word */
1383 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1384 /* retry this instruction next clock */
1385 }
1386 }
1387
1388
1389 void
1390 pke_code_direct(struct pke_device* me, unsigned_4 pkecode)
1391 {
1392 /* check that FIFO has a few more words for DIRECT operand */
1393 unsigned_4* last_direct_word;
1394 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1395 int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
1396
1397 /* map zero to max+1 */
1398 if(imm==0) imm=0x10000;
1399
1400 last_direct_word = pke_pc_operand(me, imm*4); /* num: number of 128-bit words */
1401 if(last_direct_word != NULL)
1402 {
1403 /* VU idle */
1404 int i;
1405 quadword fifo_data;
1406
1407 /* "transferring" operand */
1408 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1409
1410 /* transfer GPUIF quadwords, one word per iteration */
1411 for(i=0; i<imm*4; i++)
1412 {
1413 unsigned_4* operand;
1414 struct fifo_quadword* fq = pke_pc_fifo(me, num, &operand);
1415
1416 /* collect word into quadword */
1417 fifo_data[i%4] = *operand;
1418
1419 /* write to GPUIF FIFO only with full word */
1420 if(i%4 == 3)
1421 {
1422 address_word gpuif_fifo = GIF_PATH2_FIFO_ADDR+(i/4);
1423 pke_track_write(me, fifo_data, sizeof(quadword),
1424 (SIM_ADDR) gpuif_fifo, fq->source_address);
1425 } /* write collected quadword */
1426
1427 } /* GPUIF xfer loop */
1428
1429 /* done */
1430 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1431 pke_pc_advance(me, 1 + imm*4);
1432 } /* if FIFO full enough */
1433 else
1434 {
1435 /* need to wait for another word */
1436 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1437 /* retry this instruction next clock */
1438 }
1439 }
1440
1441
1442 void
1443 pke_code_directhl(struct pke_device* me, unsigned_4 pkecode)
1444 {
1445 /* treat the same as DIRECTH */
1446 pke_code_direct(me, pkecode);
1447 }
1448
1449
1450 void
1451 pke_code_unpack(struct pke_device* me, unsigned_4 pkecode)
1452 {
1453 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1454 int cmd = BIT_MASK_GET(pkecode, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E);
1455 int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
1456 short vn = BIT_MASK_GET(cmd, 2, 3); /* unpack shape controls */
1457 short vl = BIT_MASK_GET(cmd, 0, 1);
1458 int m = BIT_MASK_GET(cmd, 4, 4);
1459 short cl = PKE_REG_MASK_GET(me, CYCLE, CL); /* cycle controls */
1460 short wl = PKE_REG_MASK_GET(me, CYCLE, WL);
1461 int r = BIT_MASK_GET(imm, 15, 15); /* indicator bits in imm value */
1462 int sx = BIT_MASK_GET(imm, 14, 14);
1463
1464 int n, num_operands;
1465 unsigned_4* last_operand_word;
1466
1467 /* map zero to max+1 */
1468 if(num==0) num=0x100;
1469
1470 /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */
1471 if(wl <= cl)
1472 n = num;
1473 else
1474 n = cl * (num/wl) + PKE_LIMIT(num % wl, cl);
1475 num_operands = (((sizeof(unsigned_4) >> vl) * (vn+1) * n)/sizeof(unsigned_4));
1476
1477 /* confirm that FIFO has enough words in it */
1478 last_operand_word = pke_pc_operand(me, num_operands);
1479 if(last_operand_word != NULL)
1480 {
1481 address_word vu_addr_base;
1482 int vector_num;
1483
1484 /* "transferring" operand */
1485 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1486
1487 /* don't check whether VU is idle */
1488
1489 /* compute VU address base */
1490 if(me->pke_number == 0)
1491 vu_addr_base = VU0_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9);
1492 else
1493 {
1494 vu_addr_base = VU1_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9);
1495 if(r) vu_addr_base += PKE_REG_MASK_GET(me, TOPS, TOPS);
1496 }
1497
1498 /* set NUM */
1499 PKE_REG_MASK_SET(me, NUM, NUM, num);
1500
1501 /* transfer given number of vectors */
1502 vector_num = 0; /* output vector number being processed */
1503 do
1504 {
1505 quadword vu_old_data;
1506 quadword vu_new_data;
1507 quadword unpacked_data;
1508 address_word vu_addr;
1509 unsigned_4 source_addr = 0;
1510 int i;
1511
1512 /* decrement NUM */
1513 PKE_REG_MASK_SET(me, NUM, NUM,
1514 PKE_REG_MASK_GET(me, NUM, NUM) - 1);
1515
1516 /* compute VU destination address, as bytes in R5900 memory */
1517 if(cl >= wl)
1518 {
1519 /* map zero to max+1 */
1520 if(wl == 0) wl = 0x0100;
1521 vu_addr = vu_addr_base + 16*(cl*(vector_num/wl) + (vector_num%wl));
1522 }
1523 else
1524 vu_addr = vu_addr_base + 16*vector_num;
1525
1526 /* XXX: can vu_addr overflow? */
1527
1528 /* read old VU data word at address */
1529 sim_read(NULL, (SIM_ADDR) vu_addr, (void*) & vu_old_data, sizeof(vu_old_data));
1530
1531 /* For cyclic unpack, next operand quadword may come from instruction stream
1532 or be zero. */
1533 if((cl < wl) && ((vector_num % wl) >= cl)) /* wl != 0, set above */
1534 {
1535 /* clear operand - used only in a "indeterminate" state */
1536 for(i = 0; i < 4; i++)
1537 unpacked_data[i] = 0;
1538 }
1539 else
1540 {
1541 /* compute packed vector dimensions */
1542 int vectorbits, unitbits;
1543
1544 if(vl < 3) /* PKE_UNPACK_*_{32,16,8} */
1545 {
1546 unitbits = (32 >> vl);
1547 vectorbits = unitbits * (vn+1);
1548 }
1549 else if(vl == 3 && vn == 3) /* PKE_UNPACK_V4_5 */
1550 {
1551 unitbits = 5;
1552 vectorbits = 16;
1553 }
1554 else /* illegal unpack variant */
1555 {
1556 /* treat as illegal instruction */
1557 pke_code_error(me, pkecode);
1558 return;
1559 }
1560
1561 /* loop over columns */
1562 for(i=0; i<=vn; i++)
1563 {
1564 unsigned_4 operand;
1565
1566 /* offset in bits in current operand word */
1567 int bitoffset =
1568 (vector_num * vectorbits) + (i * unitbits); /* # of bits from PKEcode */
1569
1570 /* last unit of V4_5 is only one bit wide */
1571 if(vl == 3 && vn == 3 && i == 3) /* PKE_UNPACK_V4_5 */
1572 unitbits = 1;
1573
1574 /* fetch bitfield operand */
1575 operand = pke_pc_operand_bits(me, bitoffset, unitbits, & source_addr);
1576
1577 /* selectively sign-extend; not for V4_5 1-bit value */
1578 if(sx && unitbits > 0)
1579 unpacked_data[i] = SEXT32(operand, unitbits-1);
1580 else
1581 unpacked_data[i] = operand;
1582 }
1583 } /* unpack word from instruction operand */
1584
1585 /* compute replacement word */
1586 if(m) /* use mask register? */
1587 {
1588 /* compute index into mask register for this word */
1589 int mask_index = PKE_LIMIT(vector_num % wl, 3); /* wl != 0, set above */
1590
1591 for(i=0; i<3; i++) /* loop over columns */
1592 {
1593 int mask_op = PKE_MASKREG_GET(me, mask_index, i);
1594 unsigned_4* masked_value = NULL;
1595 unsigned_4 zero = 0;
1596
1597 switch(mask_op)
1598 {
1599 case PKE_MASKREG_INPUT:
1600 /* for vn == 0, all columns are copied from column 0 */
1601 if(vn == 0)
1602 masked_value = & unpacked_data[0];
1603 else if(i > vn)
1604 masked_value = & zero; /* arbitrary data: undefined in spec */
1605 else
1606 masked_value = & unpacked_data[i];
1607 break;
1608
1609 case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */
1610 masked_value = & me->regs[PKE_REG_R0 + i][0];
1611 break;
1612
1613 case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */
1614 masked_value = & me->regs[PKE_REG_C0 + PKE_LIMIT(vector_num,3)][0];
1615 break;
1616
1617 case PKE_MASKREG_NOTHING:
1618 /* "write inhibit" by re-copying old data */
1619 masked_value = & vu_old_data[i];
1620 break;
1621
1622 default:
1623 ASSERT(0);
1624 /* no other cases possible */
1625 }
1626
1627 /* copy masked value for column */
1628 vu_new_data[i] = *masked_value;
1629 } /* loop over columns */
1630 } /* mask */
1631 else
1632 {
1633 /* no mask - just copy over entire unpacked quadword */
1634 memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data));
1635 }
1636
1637 /* process STMOD register for accumulation operations */
1638 switch(PKE_REG_MASK_GET(me, MODE, MDE))
1639 {
1640 case PKE_MODE_ADDROW: /* add row registers to output data */
1641 for(i=0; i<4; i++)
1642 /* exploit R0..R3 contiguity */
1643 vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
1644 break;
1645
1646 case PKE_MODE_ACCROW: /* add row registers to output data; accumulate */
1647 for(i=0; i<4; i++)
1648 {
1649 /* exploit R0..R3 contiguity */
1650 vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
1651 me->regs[PKE_REG_R0 + i][0] = vu_new_data[i];
1652 }
1653 break;
1654
1655 case PKE_MODE_INPUT: /* pass data through */
1656 default:
1657 ;
1658 }
1659
1660 /* write replacement word */
1661 pke_track_write(me, vu_new_data, sizeof(vu_new_data),
1662 (SIM_ADDR) vu_addr, source_addr);
1663
1664 /* next vector please */
1665 vector_num ++;
1666 } /* vector transfer loop */
1667 while(PKE_REG_MASK_GET(me, NUM, NUM) > 0);
1668
1669 /* done */
1670 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1671 pke_pc_advance(me, num_operands);
1672 } /* PKE FIFO full enough */
1673 else
1674 {
1675 /* need to wait for another word */
1676 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1677 /* retry this instruction next clock */
1678 }
1679 }
1680
1681
1682 void
1683 pke_code_error(struct pke_device* me, unsigned_4 pkecode)
1684 {
1685 /* set ER1 flag in STAT register */
1686 PKE_REG_MASK_SET(me, STAT, ER1, 1);
1687 /* advance over faulty word */
1688 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1689 pke_pc_advance(me, 1);
1690 }
This page took 0.079759 seconds and 4 git commands to generate.