9655f8710860103a8aaddddc3ddf8d84e970676c
[deliverable/binutils-gdb.git] / sim / mips / sky-pke.c
1 /* Copyright (C) 1998, Cygnus Solutions */
2
3
4 /* Debugguing PKE? */
5 #define PKE_DEBUG
6
7
8 #include <stdlib.h>
9 #include "sky-pke.h"
10 #include "sky-dma.h"
11 #include "sim-bits.h"
12 #include "sim-assert.h"
13 #include "sky-vu0.h"
14 #include "sky-vu1.h"
15 #include "sky-gpuif.h"
16
17
18 /* Imported functions */
19
20 void device_error (device *me, char* message); /* device.c */
21
22
23 /* Internal function declarations */
24
25 static int pke_io_read_buffer(device*, void*, int, address_word,
26 unsigned, sim_cpu*, sim_cia);
27 static int pke_io_write_buffer(device*, const void*, int, address_word,
28 unsigned, sim_cpu*, sim_cia);
29 static void pke_issue(SIM_DESC, struct pke_device*);
30 static void pke_pc_advance(struct pke_device*, int num_words);
31 static unsigned_4* pke_pc_operand(struct pke_device*, int operand_num);
32 static unsigned_4 pke_pc_operand_bits(struct pke_device*, int bit_offset,
33 int bit_width, unsigned_4* sourceaddr);
34 static struct fifo_quadword* pke_pc_fifo(struct pke_device*, int operand_num,
35 unsigned_4** operand);
36 static int pke_track_write(struct pke_device*, const void* src, int len,
37 address_word dest, unsigned_4 sourceaddr);
38 static void pke_attach(SIM_DESC sd, struct pke_device* me);
39 enum pke_check_target { chk_vu, chk_path1, chk_path2, chk_path3 };
40 static int pke_check_stall(struct pke_device* me, enum pke_check_target what);
41 static void pke_flip_dbf(struct pke_device* me);
42 /* PKEcode handlers */
43 static void pke_code_nop(struct pke_device* me, unsigned_4 pkecode);
44 static void pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode);
45 static void pke_code_offset(struct pke_device* me, unsigned_4 pkecode);
46 static void pke_code_base(struct pke_device* me, unsigned_4 pkecode);
47 static void pke_code_itop(struct pke_device* me, unsigned_4 pkecode);
48 static void pke_code_stmod(struct pke_device* me, unsigned_4 pkecode);
49 static void pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode);
50 static void pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode);
51 static void pke_code_flushe(struct pke_device* me, unsigned_4 pkecode);
52 static void pke_code_flush(struct pke_device* me, unsigned_4 pkecode);
53 static void pke_code_flusha(struct pke_device* me, unsigned_4 pkecode);
54 static void pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode);
55 static void pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode);
56 static void pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode);
57 static void pke_code_stmask(struct pke_device* me, unsigned_4 pkecode);
58 static void pke_code_strow(struct pke_device* me, unsigned_4 pkecode);
59 static void pke_code_stcol(struct pke_device* me, unsigned_4 pkecode);
60 static void pke_code_mpg(struct pke_device* me, unsigned_4 pkecode);
61 static void pke_code_direct(struct pke_device* me, unsigned_4 pkecode);
62 static void pke_code_directhl(struct pke_device* me, unsigned_4 pkecode);
63 static void pke_code_unpack(struct pke_device* me, unsigned_4 pkecode);
64 static void pke_code_error(struct pke_device* me, unsigned_4 pkecode);
65
66
67
68 /* Static data */
69
70 struct pke_device pke0_device =
71 {
72 { "pke0", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
73 0, 0, /* ID, flags */
74 {}, /* regs */
75 {}, 0, /* FIFO write buffer */
76 NULL, 0, 0, NULL, /* FIFO */
77 0, 0 /* pc */
78 };
79
80
81 struct pke_device pke1_device =
82 {
83 { "pke1", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
84 1, 0, /* ID, flags */
85 {}, /* regs */
86 {}, 0, /* FIFO write buffer */
87 NULL, 0, 0, NULL, /* FIFO */
88 0, 0 /* pc */
89 };
90
91
92
93 /* External functions */
94
95
96 /* Attach PKE addresses to main memory */
97
98 void
99 pke0_attach(SIM_DESC sd)
100 {
101 pke_attach(sd, & pke0_device);
102 }
103
104 void
105 pke1_attach(SIM_DESC sd)
106 {
107 pke_attach(sd, & pke1_device);
108 }
109
110
111
112 /* Issue a PKE instruction if possible */
113
114 void
115 pke0_issue(SIM_DESC sd)
116 {
117 pke_issue(sd, & pke0_device);
118 }
119
120 void
121 pke1_issue(SIM_DESC sd)
122 {
123 pke_issue(sd, & pke0_device);
124 }
125
126
127
128 /* Internal functions */
129
130
131 /* Attach PKE memory regions to simulator */
132
133 void
134 pke_attach(SIM_DESC sd, struct pke_device* me)
135 {
136 /* register file */
137 sim_core_attach (sd, NULL, 0, access_read_write, 0,
138 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START,
139 PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/,
140 0 /*modulo*/,
141 (device*) me,
142 NULL /*buffer*/);
143
144 /* FIFO port */
145 sim_core_attach (sd, NULL, 0, access_read_write, 0,
146 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR,
147 sizeof(quadword) /*nr_bytes*/,
148 0 /*modulo*/,
149 (device*) me,
150 NULL /*buffer*/);
151
152 /* source-addr tracking word */
153 sim_core_attach (sd, NULL, 0, access_read_write, 0,
154 (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
155 sizeof(unsigned_4) /*nr_bytes*/,
156 0 /*modulo*/,
157 NULL,
158 zalloc(sizeof(unsigned_4)) /*buffer*/);
159
160 /* attach to trace file if appropriate */
161 {
162 char trace_envvar[80];
163 char* trace_filename = NULL;
164 sprintf(trace_envvar, "VIF%d_TRACE_FILE", me->pke_number);
165 trace_filename = getenv(trace_envvar);
166 if(trace_filename != NULL)
167 {
168 me->fifo_trace_file = fopen(trace_filename, "w");
169 if(me->fifo_trace_file == NULL)
170 {
171 perror("VIF FIFO trace error on fopen");
172 }
173 }
174 }
175 }
176
177
178
179 /* Handle a PKE read; return no. of bytes read */
180
181 int
182 pke_io_read_buffer(device *me_,
183 void *dest,
184 int space,
185 address_word addr,
186 unsigned nr_bytes,
187 sim_cpu *cpu,
188 sim_cia cia)
189 {
190 /* downcast to gather embedding pke_device struct */
191 struct pke_device* me = (struct pke_device*) me_;
192
193 /* find my address ranges */
194 address_word my_reg_start =
195 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
196 address_word my_fifo_addr =
197 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
198
199 /* enforce that an access does not span more than one quadword */
200 address_word low = ADDR_TRUNC_QW(addr);
201 address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
202 if(low != high)
203 return 0;
204
205 /* classify address & handle */
206 if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
207 {
208 /* register bank */
209 int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
210 int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
211 int readable = 1;
212 quadword result;
213
214 /* clear result */
215 result[0] = result[1] = result[2] = result[3] = 0;
216
217 /* handle reads to individual registers; clear `readable' on error */
218 switch(reg_num)
219 {
220 /* handle common case of register reading, side-effect free */
221 /* PKE1-only registers*/
222 case PKE_REG_BASE:
223 case PKE_REG_OFST:
224 case PKE_REG_TOPS:
225 case PKE_REG_TOP:
226 case PKE_REG_DBF:
227 if(me->pke_number == 0)
228 readable = 0;
229 /* fall through */
230 /* PKE0 & PKE1 common registers*/
231 case PKE_REG_STAT:
232 case PKE_REG_ERR:
233 case PKE_REG_MARK:
234 case PKE_REG_CYCLE:
235 case PKE_REG_MODE:
236 case PKE_REG_NUM:
237 case PKE_REG_MASK:
238 case PKE_REG_CODE:
239 case PKE_REG_ITOPS:
240 case PKE_REG_ITOP:
241 case PKE_REG_R0:
242 case PKE_REG_R1:
243 case PKE_REG_R2:
244 case PKE_REG_R3:
245 case PKE_REG_C0:
246 case PKE_REG_C1:
247 case PKE_REG_C2:
248 case PKE_REG_C3:
249 result[0] = me->regs[reg_num][0];
250 break;
251
252 /* handle common case of write-only registers */
253 case PKE_REG_FBRST:
254 readable = 0;
255 break;
256
257 default:
258 ASSERT(0); /* test above should prevent this possibility */
259 }
260
261 /* perform transfer & return */
262 if(readable)
263 {
264 /* copy the bits */
265 memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes);
266 /* okay */
267 return nr_bytes;
268 }
269 else
270 {
271 /* error */
272 return 0;
273 }
274
275 /* NOTREACHED */
276 }
277 else if(addr >= my_fifo_addr &&
278 addr < my_fifo_addr + sizeof(quadword))
279 {
280 /* FIFO */
281
282 /* FIFO is not readable: return a word of zeroes */
283 memset(dest, 0, nr_bytes);
284 return nr_bytes;
285 }
286
287 /* NOTREACHED */
288 return 0;
289 }
290
291
292 /* Handle a PKE read; return no. of bytes written */
293
294 int
295 pke_io_write_buffer(device *me_,
296 const void *src,
297 int space,
298 address_word addr,
299 unsigned nr_bytes,
300 sim_cpu *cpu,
301 sim_cia cia)
302 {
303 /* downcast to gather embedding pke_device struct */
304 struct pke_device* me = (struct pke_device*) me_;
305
306 /* find my address ranges */
307 address_word my_reg_start =
308 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
309 address_word my_fifo_addr =
310 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
311
312 /* enforce that an access does not span more than one quadword */
313 address_word low = ADDR_TRUNC_QW(addr);
314 address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
315 if(low != high)
316 return 0;
317
318 /* classify address & handle */
319 if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
320 {
321 /* register bank */
322 int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
323 int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
324 int writeable = 1;
325 quadword input;
326
327 /* clear input */
328 input[0] = input[1] = input[2] = input[3] = 0;
329
330 /* write user-given bytes into input */
331 memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes);
332
333 /* handle writes to individual registers; clear `writeable' on error */
334 switch(reg_num)
335 {
336 case PKE_REG_FBRST:
337 /* Order these tests from least to most overriding, in case
338 multiple bits are set. */
339 if(BIT_MASK_GET(input[0], 2, 2)) /* STC bit */
340 {
341 /* clear a bunch of status bits */
342 PKE_REG_MASK_SET(me, STAT, PSS, 0);
343 PKE_REG_MASK_SET(me, STAT, PFS, 0);
344 PKE_REG_MASK_SET(me, STAT, PIS, 0);
345 PKE_REG_MASK_SET(me, STAT, INT, 0);
346 PKE_REG_MASK_SET(me, STAT, ER0, 0);
347 PKE_REG_MASK_SET(me, STAT, ER1, 0);
348 me->flags &= ~PKE_FLAG_PENDING_PSS;
349 /* will allow resumption of possible stalled instruction */
350 }
351 if(BIT_MASK_GET(input[0], 2, 2)) /* STP bit */
352 {
353 me->flags |= PKE_FLAG_PENDING_PSS;
354 }
355 if(BIT_MASK_GET(input[0], 1, 1)) /* FBK bit */
356 {
357 PKE_REG_MASK_SET(me, STAT, PFS, 1);
358 }
359 if(BIT_MASK_GET(input[0], 0, 0)) /* RST bit */
360 {
361 /* clear FIFO by skipping to word after PC: also
362 prevents re-execution attempt of possible stalled
363 instruction */
364 me->fifo_num_elements = me->fifo_pc;
365 /* clear registers, flag, other state */
366 memset(me->regs, 0, sizeof(me->regs));
367 me->fifo_qw_done = 0;
368 me->flags = 0;
369 me->qw_pc = 0;
370 }
371 break;
372
373 case PKE_REG_ERR:
374 /* copy bottom three bits */
375 BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input[0], 0, 2));
376 break;
377
378 case PKE_REG_MARK:
379 /* copy bottom sixteen bits */
380 PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input[0], 0, 15));
381 /* reset MRK bit in STAT */
382 PKE_REG_MASK_SET(me, STAT, MRK, 0);
383 break;
384
385 /* handle common case of read-only registers */
386 /* PKE1-only registers - not really necessary to handle separately */
387 case PKE_REG_BASE:
388 case PKE_REG_OFST:
389 case PKE_REG_TOPS:
390 case PKE_REG_TOP:
391 case PKE_REG_DBF:
392 if(me->pke_number == 0)
393 writeable = 0;
394 /* fall through */
395 /* PKE0 & PKE1 common registers*/
396 case PKE_REG_STAT:
397 /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */
398 case PKE_REG_CYCLE:
399 case PKE_REG_MODE:
400 case PKE_REG_NUM:
401 case PKE_REG_MASK:
402 case PKE_REG_CODE:
403 case PKE_REG_ITOPS:
404 case PKE_REG_ITOP:
405 case PKE_REG_R0:
406 case PKE_REG_R1:
407 case PKE_REG_R2:
408 case PKE_REG_R3:
409 case PKE_REG_C0:
410 case PKE_REG_C1:
411 case PKE_REG_C2:
412 case PKE_REG_C3:
413 writeable = 0;
414 break;
415
416 default:
417 ASSERT(0); /* test above should prevent this possibility */
418 }
419
420 /* perform return */
421 if(writeable)
422 {
423 /* okay */
424 return nr_bytes;
425 }
426 else
427 {
428 /* error */
429 return 0;
430 }
431
432 /* NOTREACHED */
433 }
434 else if(addr >= my_fifo_addr &&
435 addr < my_fifo_addr + sizeof(quadword))
436 {
437 /* FIFO */
438 struct fifo_quadword* fqw;
439 int fifo_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside fifo quadword */
440 unsigned_4 dma_tag_present = 0;
441 int i;
442
443 /* collect potentially-partial quadword in write buffer */
444 memcpy(((unsigned_1*)& me->fifo_qw_in_progress) + fifo_byte, src, nr_bytes);
445 /* mark bytes written */
446 for(i = fifo_byte; i < fifo_byte + nr_bytes; i++)
447 BIT_MASK_SET(me->fifo_qw_done, i, i, 1);
448
449 /* return if quadword not quite written yet */
450 if(BIT_MASK_GET(me->fifo_qw_done, 0, sizeof(quadword)-1) !=
451 BIT_MASK_BTW(0, sizeof(quadword)-1))
452 return nr_bytes;
453
454 /* all done - process quadword after clearing flag */
455 BIT_MASK_SET(me->fifo_qw_done, 0, sizeof(quadword)-1, 0);
456
457 /* ensure FIFO has enough elements */
458 if(me->fifo_num_elements == me->fifo_buffer_size)
459 {
460 /* time to grow */
461 int new_fifo_buffer_size = me->fifo_buffer_size + 20;
462 void* ptr = realloc((void*) me->fifo, new_fifo_buffer_size*sizeof(quadword));
463
464 if(ptr == NULL)
465 {
466 /* oops, cannot enlarge FIFO any more */
467 device_error(me_, "Cannot enlarge FIFO buffer\n");
468 return 0;
469 }
470
471 me->fifo = ptr;
472 me->fifo_buffer_size = new_fifo_buffer_size;
473 }
474
475 /* add new quadword at end of FIFO */
476 fqw = & me->fifo[me->fifo_num_elements];
477 fqw->word_class[0] = fqw->word_class[1] =
478 fqw->word_class[2] = fqw->word_class[3] = wc_unknown;
479 memcpy((void*) fqw->data, me->fifo_qw_in_progress, sizeof(quadword));
480 ASSERT(sizeof(unsigned_4) == 4);
481 PKE_MEM_READ((SIM_ADDR) (me->pke_number == 0 ? DMA_D0_MADR : DMA_D1_MADR),
482 & fqw->source_address,
483 4);
484 PKE_MEM_READ((SIM_ADDR) (me->pke_number == 0 ? DMA_D0_PKTFLAG : DMA_D1_PKTFLAG),
485 & dma_tag_present,
486 4);
487
488 if(dma_tag_present)
489 {
490 /* lower two words are DMA tags */
491 fqw->word_class[0] = fqw->word_class[1] = wc_dma;
492 }
493
494
495 me->fifo_num_elements++;
496
497 /* set FQC to "1" as FIFO is now not empty */
498 PKE_REG_MASK_SET(me, STAT, FQC, 1);
499
500 /* okay */
501 return nr_bytes;
502 }
503
504 /* NOTREACHED */
505 return 0;
506 }
507
508
509
510 /* Issue & swallow next PKE opcode if possible/available */
511
512 void
513 pke_issue(SIM_DESC sd, struct pke_device* me)
514 {
515 struct fifo_quadword* fqw;
516 unsigned_4 fw;
517 unsigned_4 cmd, intr, num;
518 unsigned_4 imm;
519
520 /* 1 -- test go / no-go for PKE execution */
521
522 /* switch on STAT:PSS if PSS-pending and in idle state */
523 if((PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE) &&
524 (me->flags & PKE_FLAG_PENDING_PSS) != 0)
525 {
526 me->flags &= ~PKE_FLAG_PENDING_PSS;
527 PKE_REG_MASK_SET(me, STAT, PSS, 1);
528 }
529
530 /* check for stall/halt control bits */
531 if(PKE_REG_MASK_GET(me, STAT, PFS) ||
532 PKE_REG_MASK_GET(me, STAT, PSS) || /* note special treatment below */
533 /* PEW bit not a reason to keep stalling - it's re-checked below */
534 /* PGW bit not a reason to keep stalling - it's re-checked below */
535 /* maskable stall controls: ER0, ER1, PIS */
536 (PKE_REG_MASK_GET(me, STAT, ER0) && !PKE_REG_MASK_GET(me, ERR, ME0)) ||
537 (PKE_REG_MASK_GET(me, STAT, ER1) && !PKE_REG_MASK_GET(me, ERR, ME1)) ||
538 (PKE_REG_MASK_GET(me, STAT, PIS) && !PKE_REG_MASK_GET(me, ERR, MII)))
539 {
540 /* try again next cycle; no state change */
541 return;
542 }
543
544 /* confirm availability of new quadword of PKE instructions */
545 if(me->fifo_num_elements <= me->fifo_pc)
546 return;
547
548
549 /* 2 -- fetch PKE instruction */
550
551 /* skip over DMA tag, if present */
552 pke_pc_advance(me, 0);
553
554 /* "fetch" instruction quadword and word */
555 fqw = & me->fifo[me->fifo_pc];
556 fw = fqw->data[me->qw_pc];
557
558 /* store word in PKECODE register */
559 me->regs[PKE_REG_CODE][0] = fw;
560
561
562 /* 3 -- decode PKE instruction */
563
564 /* PKE instruction format: [intr 0:0][pke-command 6:0][num 7:0][immediate 15:0],
565 so op-code is in top byte. */
566 intr = BIT_MASK_GET(fw, PKE_OPCODE_I_B, PKE_OPCODE_I_E);
567 cmd = BIT_MASK_GET(fw, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E);
568 num = BIT_MASK_GET(fw, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
569 imm = BIT_MASK_GET(fw, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
570
571 if(intr)
572 {
573 /* set INT flag in STAT register */
574 PKE_REG_MASK_SET(me, STAT, INT, 1);
575 /* XXX: send interrupt to 5900? */
576 }
577
578 /* decoding */
579 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE);
580
581 /* decode & execute */
582 if(IS_PKE_CMD(cmd, PKENOP))
583 pke_code_nop(me, fw);
584 else if(IS_PKE_CMD(cmd, STCYCL))
585 pke_code_stcycl(me, fw);
586 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, OFFSET))
587 pke_code_offset(me, fw);
588 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, BASE))
589 pke_code_base(me, fw);
590 else if(IS_PKE_CMD(cmd, ITOP))
591 pke_code_itop(me, fw);
592 else if(IS_PKE_CMD(cmd, STMOD))
593 pke_code_stmod(me, fw);
594 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3))
595 pke_code_mskpath3(me, fw);
596 else if(IS_PKE_CMD(cmd, PKEMARK))
597 pke_code_pkemark(me, fw);
598 else if(IS_PKE_CMD(cmd, FLUSHE))
599 pke_code_flushe(me, fw);
600 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSH))
601 pke_code_flush(me, fw);
602 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA))
603 pke_code_flusha(me, fw);
604 else if(IS_PKE_CMD(cmd, PKEMSCAL))
605 pke_code_pkemscal(me, fw);
606 else if(IS_PKE_CMD(cmd, PKEMSCNT))
607 pke_code_pkemscnt(me, fw);
608 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF))
609 pke_code_pkemscalf(me, fw);
610 else if(IS_PKE_CMD(cmd, STMASK))
611 pke_code_stmask(me, fw);
612 else if(IS_PKE_CMD(cmd, STROW))
613 pke_code_strow(me, fw);
614 else if(IS_PKE_CMD(cmd, STCOL))
615 pke_code_stcol(me, fw);
616 else if(IS_PKE_CMD(cmd, MPG))
617 pke_code_mpg(me, fw);
618 else if(IS_PKE_CMD(cmd, DIRECT))
619 pke_code_direct(me, fw);
620 else if(IS_PKE_CMD(cmd, DIRECTHL))
621 pke_code_directhl(me, fw);
622 else if(IS_PKE_CMD(cmd, UNPACK))
623 pke_code_unpack(me, fw);
624 /* ... no other commands ... */
625 else
626 pke_code_error(me, fw);
627 }
628
629
630
631 /* advance the PC by given number of data words; update STAT/FQC
632 field; assume FIFO is filled enough; classify passed-over words;
633 write FIFO trace line */
634
635 void
636 pke_pc_advance(struct pke_device* me, int num_words)
637 {
638 int num = num_words;
639 struct fifo_quadword* fq = NULL;
640 int skipped = 0;
641 ASSERT(num_words >= 0);
642
643 do
644 {
645 fq = & me->fifo[me->fifo_pc];
646
647 /* skip over DMA tag words if present in word 0 or 1 */
648 if(fq->word_class[me->qw_pc] == wc_dma)
649 {
650 /* skip by going around loop an extra time */
651 num ++;
652 skipped = 1;
653 }
654 else
655 skipped = 0;
656
657 if(num > 0) /* increment PC */
658 {
659 /* one word skipped */
660 num --;
661
662 /* point to next word */
663 me->qw_pc ++;
664 if(me->qw_pc == 4)
665 {
666 me->qw_pc = 0;
667 me->fifo_pc ++;
668
669 /* trace the consumption of this FIFO quadword */
670 if(me->fifo_trace_file != NULL)
671 {
672 /* assert complete classification */
673 ASSERT(fq->word_class[3] != wc_unknown);
674 ASSERT(fq->word_class[2] != wc_unknown);
675 ASSERT(fq->word_class[1] != wc_unknown);
676 ASSERT(fq->word_class[0] != wc_unknown);
677
678 /* print trace record */
679 fprintf(me->fifo_trace_file,
680 "%d 0x%ux_%ux_%ux_%ux 0x%ux %c%c%c%c\n",
681 (me->pke_number == 0 ? 0 : 1),
682 (unsigned) fq->data[3], (unsigned) fq->data[2],
683 (unsigned) fq->data[1], (unsigned) fq->data[0],
684 (unsigned) fq->source_address,
685 fq->word_class[3], fq->word_class[2],
686 fq->word_class[1], fq->word_class[0]);
687 }
688
689 /* XXX: zap old entries in FIFO */
690
691 } /* next quadword */
692 } /* increment PC */
693 } /* eat num words */
694 while(num > 0 || skipped);
695
696 /* clear FQC if FIFO is now empty */
697 if(me->fifo_num_elements == me->fifo_pc)
698 {
699 PKE_REG_MASK_SET(me, STAT, FQC, 0);
700 }
701 else /* annote the word where the PC lands as an PKEcode */
702 {
703 ASSERT(fq->word_class[me->qw_pc] == wc_pkecode ||
704 fq->word_class[me->qw_pc] == wc_unknown);
705 fq->word_class[me->qw_pc] = wc_pkecode;
706 }
707 }
708
709
710
711 /* Return pointer to FIFO quadword containing given operand# in FIFO.
712 `operand_num' starts at 1. Return pointer to operand word in last
713 argument, if non-NULL. If FIFO is not full enough, return 0.
714 Signal an ER0 indication upon skipping a DMA tag. */
715
716 struct fifo_quadword*
717 pke_pc_fifo(struct pke_device* me, int operand_num, unsigned_4** operand)
718 {
719 int num = operand_num;
720 int new_qw_pc, new_fifo_pc;
721 struct fifo_quadword* fq = NULL;
722
723 ASSERT(num > 0);
724
725 /* snapshot current pointers */
726 new_fifo_pc = me->fifo_pc;
727 new_qw_pc = me->qw_pc;
728
729 do
730 {
731 /* one word skipped */
732 num --;
733
734 /* point to next word */
735 new_qw_pc ++;
736 if(new_qw_pc == 4)
737 {
738 new_qw_pc = 0;
739 new_fifo_pc ++;
740 }
741
742 /* check for FIFO underflow */
743 if(me->fifo_num_elements == new_fifo_pc)
744 {
745 fq = NULL;
746 break;
747 }
748
749 /* skip over DMA tag words if present in word 0 or 1 */
750 fq = & me->fifo[new_fifo_pc];
751 if(fq->word_class[new_qw_pc] == wc_dma)
752 {
753 /* mismatch error! */
754 PKE_REG_MASK_SET(me, STAT, ER0, 1);
755 /* skip by going around loop an extra time */
756 num ++;
757 }
758 }
759 while(num > 0);
760
761 /* return pointer to operand word itself */
762 if(fq != NULL)
763 {
764 *operand = & fq->data[new_qw_pc];
765
766 /* annote the word where the pseudo lands as an PKE operand */
767 ASSERT(fq->word_class[new_qw_pc] == wc_pkedata ||
768 fq->word_class[new_qw_pc] == wc_unknown);
769 fq->word_class[new_qw_pc] = wc_pkedata;
770 }
771
772 return fq;
773 }
774
775
776 /* Return pointer to given operand# in FIFO. `operand_num' starts at 1.
777 If FIFO is not full enough, return 0. Skip over DMA tags, but mark
778 them as an error (ER0). */
779
780 unsigned_4*
781 pke_pc_operand(struct pke_device* me, int operand_num)
782 {
783 unsigned_4* operand = NULL;
784 struct fifo_quadword* fifo_operand;
785
786 fifo_operand = pke_pc_fifo(me, operand_num, & operand);
787
788 if(fifo_operand == NULL)
789 ASSERT(operand == NULL); /* pke_pc_fifo() ought leave it untouched */
790
791 return operand;
792 }
793
794
795 /* Return a bit-field extract of given operand# in FIFO, and its
796 source-addr. `bit_offset' starts at 0, referring to LSB after PKE
797 instruction word. Width must be >0, <=32. Assume FIFO is full
798 enough. Skip over DMA tags, but mark them as an error (ER0). */
799
800 unsigned_4
801 pke_pc_operand_bits(struct pke_device* me, int bit_offset, int bit_width, unsigned_4* source_addr)
802 {
803 unsigned_4* word = NULL;
804 unsigned_4 value;
805 struct fifo_quadword* fifo_operand;
806
807 /* find operand word with bitfield */
808 fifo_operand = pke_pc_fifo(me, (bit_offset / 32) + 1, &word);
809 ASSERT(word != 0);
810
811 /* extract bitfield from word */
812 value = BIT_MASK_GET(*word, bit_offset % 32, bit_width);
813
814 /* extract source addr from fifo word */
815 *source_addr = fifo_operand->source_address;
816
817 return value;
818 }
819
820
821
822
823
824 /* Write a bunch of bytes into simulator memory. Store the given source address into the
825 PKE sourceaddr tracking word. */
826 int
827 pke_track_write(struct pke_device* me, const void* src, int len,
828 address_word dest, unsigned_4 sourceaddr)
829 {
830 int rc;
831 unsigned_4 no_sourceaddr = 0;
832
833 /* write srcaddr into PKE srcaddr tracking */
834 sim_write(NULL,
835 (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
836 (void*) & sourceaddr,
837 sizeof(unsigned_4));
838
839 /* write bytes into simulator */
840 rc = sim_write(NULL,
841 (SIM_ADDR) dest,
842 (void*) src,
843 len);
844
845 /* clear srcaddr from PKE srcaddr tracking */
846 sim_write(NULL,
847 (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
848 (void*) & no_sourceaddr,
849 sizeof(unsigned_4));
850
851 return rc;
852 }
853
854
855 /* check for stall conditions on indicated devices (path* only on PKE1), do not change status
856 return 0 iff no stall */
857 int
858 pke_check_stall(struct pke_device* me, enum pke_check_target what)
859 {
860 int any_stall = 0;
861 unsigned_4 cop2_stat, gpuif_stat;
862
863 /* read status words */
864 sim_read(NULL,
865 (SIM_ADDR) (GIF_REG_STAT),
866 (void*) & gpuif_stat,
867 sizeof(unsigned_4));
868
869 sim_read(NULL,
870 (SIM_ADDR) (COP2_REG_STAT_ADDR),
871 (void*) & cop2_stat,
872 sizeof(unsigned_4));
873
874 /* perform checks */
875 if(what == chk_vu)
876 {
877 if(me->pke_number == 0)
878 any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS0_B, COP2_REG_STAT_VBS0_E);
879 else /* if(me->pke_number == 1) */
880 any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS1_B, COP2_REG_STAT_VBS1_E);
881 }
882 else if(what == chk_path1) /* VU -> GPUIF */
883 {
884 if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 1)
885 any_stall = 1;
886 }
887 else if(what == chk_path2) /* PKE -> GPUIF */
888 {
889 if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 2)
890 any_stall = 1;
891 }
892 else if(what == chk_path3) /* DMA -> GPUIF */
893 {
894 if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 3)
895 any_stall = 1;
896 }
897 else
898 {
899 /* invalid what */
900 ASSERT(0);
901 }
902
903 /* any stall reasons? */
904 return any_stall;
905 }
906
907
908 /* flip the DBF bit; recompute TOPS, ITOP & TOP */
909 void
910 pke_flip_dbf(struct pke_device* me)
911 {
912 /* flip DBF */
913 PKE_REG_MASK_SET(me, DBF, DF,
914 PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1);
915 PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF));
916 /* compute new TOPS */
917 PKE_REG_MASK_SET(me, TOPS, TOPS,
918 (PKE_REG_MASK_GET(me, BASE, BASE) +
919 (PKE_REG_MASK_GET(me, DBF, DF) *
920 PKE_REG_MASK_GET(me, OFST, OFFSET))));
921 /* compute new ITOP and TOP */
922 PKE_REG_MASK_SET(me, ITOP, ITOP,
923 PKE_REG_MASK_GET(me, ITOPS, ITOPS));
924 PKE_REG_MASK_SET(me, TOP, TOP,
925 PKE_REG_MASK_GET(me, TOPS, TOPS));
926 }
927
928
929
930 /* PKEcode handler functions -- responsible for checking and
931 confirming old stall conditions, executing pkecode, updating PC and
932 status registers -- may assume being run on correct PKE unit */
933
934 void
935 pke_code_nop(struct pke_device* me, unsigned_4 pkecode)
936 {
937 /* done */
938 pke_pc_advance(me, 1);
939 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
940 }
941
942
943 void
944 pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode)
945 {
946 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
947 /* copy immediate value into CYCLE reg */
948 me->regs[PKE_REG_CYCLE][0] = imm;
949 /* done */
950 pke_pc_advance(me, 1);
951 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
952 }
953
954
955 void
956 pke_code_offset(struct pke_device* me, unsigned_4 pkecode)
957 {
958 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
959 /* copy 10 bits to OFFSET field */
960 PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9));
961 /* clear DBF bit */
962 PKE_REG_MASK_SET(me, DBF, DF, 0);
963 /* clear other DBF bit */
964 PKE_REG_MASK_SET(me, STAT, DBF, 0);
965 /* set TOPS = BASE */
966 PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE));
967 /* done */
968 pke_pc_advance(me, 1);
969 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
970 }
971
972
973 void
974 pke_code_base(struct pke_device* me, unsigned_4 pkecode)
975 {
976 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
977 /* copy 10 bits to BASE field */
978 PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9));
979 /* clear DBF bit */
980 PKE_REG_MASK_SET(me, DBF, DF, 0);
981 /* clear other DBF bit */
982 PKE_REG_MASK_SET(me, STAT, DBF, 0);
983 /* set TOPS = BASE */
984 PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE));
985 /* done */
986 pke_pc_advance(me, 1);
987 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
988 }
989
990
991 void
992 pke_code_itop(struct pke_device* me, unsigned_4 pkecode)
993 {
994 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
995 /* copy 10 bits to ITOPS field */
996 PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9));
997 /* done */
998 pke_pc_advance(me, 1);
999 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1000 }
1001
1002
1003 void
1004 pke_code_stmod(struct pke_device* me, unsigned_4 pkecode)
1005 {
1006 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1007 /* copy 2 bits to MODE register */
1008 PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2));
1009 /* done */
1010 pke_pc_advance(me, 1);
1011 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1012 }
1013
1014
1015 void
1016 pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode)
1017 {
1018 ASSERT(0);
1019 /* XXX: no easy interface toward GPUIF for this purpose */
1020 }
1021
1022
1023 void
1024 pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode)
1025 {
1026 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1027 /* copy 16 bits to MARK register */
1028 PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15));
1029 /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */
1030 PKE_REG_MASK_SET(me, STAT, MRK, 1);
1031 /* done */
1032 pke_pc_advance(me, 1);
1033 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1034 }
1035
1036
1037 void
1038 pke_code_flushe(struct pke_device* me, unsigned_4 pkecode)
1039 {
1040 /* compute next PEW bit */
1041 if(pke_check_stall(me, chk_vu))
1042 {
1043 /* VU busy */
1044 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1045 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1046 /* try again next cycle */
1047 }
1048 else
1049 {
1050 /* VU idle */
1051 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1052 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1053 pke_pc_advance(me, 1);
1054 }
1055 }
1056
1057
1058 void
1059 pke_code_flush(struct pke_device* me, unsigned_4 pkecode)
1060 {
1061 int something_busy = 0;
1062
1063 /* compute next PEW, PGW bits */
1064 if(pke_check_stall(me, chk_vu))
1065 {
1066 something_busy = 1;
1067 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1068 }
1069 else
1070 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1071
1072
1073 if(pke_check_stall(me, chk_path1) ||
1074 pke_check_stall(me, chk_path2))
1075 {
1076 something_busy = 1;
1077 PKE_REG_MASK_SET(me, STAT, PGW, 1);
1078 }
1079 else
1080 PKE_REG_MASK_SET(me, STAT, PGW, 0);
1081
1082 /* go or no go */
1083 if(something_busy)
1084 {
1085 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1086 /* try again next cycle */
1087 }
1088 else
1089 {
1090 /* all idle */
1091 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1092 pke_pc_advance(me, 1);
1093 }
1094 }
1095
1096
1097 void
1098 pke_code_flusha(struct pke_device* me, unsigned_4 pkecode)
1099 {
1100 int something_busy = 0;
1101
1102 /* compute next PEW, PGW bits */
1103 if(pke_check_stall(me, chk_vu))
1104 {
1105 something_busy = 1;
1106 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1107 }
1108 else
1109 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1110
1111
1112 if(pke_check_stall(me, chk_path1) ||
1113 pke_check_stall(me, chk_path2) ||
1114 pke_check_stall(me, chk_path3))
1115 {
1116 something_busy = 1;
1117 PKE_REG_MASK_SET(me, STAT, PGW, 1);
1118 }
1119 else
1120 PKE_REG_MASK_SET(me, STAT, PGW, 0);
1121
1122 if(something_busy)
1123 {
1124 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1125 /* try again next cycle */
1126 }
1127 else
1128 {
1129 /* all idle */
1130 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1131 pke_pc_advance(me, 1);
1132 }
1133 }
1134
1135
1136 void
1137 pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode)
1138 {
1139 /* compute next PEW bit */
1140 if(pke_check_stall(me, chk_vu))
1141 {
1142 /* VU busy */
1143 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1144 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1145 /* try again next cycle */
1146 }
1147 else
1148 {
1149 unsigned_4 vu_pc;
1150 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1151
1152 /* VU idle */
1153 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1154
1155 /* flip DBF on PKE1 */
1156 if(me->pke_number == 1)
1157 pke_flip_dbf(me);
1158
1159 /* compute new PC for VU */
1160 vu_pc = BIT_MASK_GET(imm, 0, 15);
1161 /* write new PC; callback function gets VU running */
1162 sim_write(NULL,
1163 (SIM_ADDR) (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1164 (void*) & vu_pc,
1165 sizeof(unsigned_4));
1166
1167 /* done */
1168 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1169 pke_pc_advance(me, 1);
1170 }
1171 }
1172
1173
1174
1175 void
1176 pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode)
1177 {
1178 /* compute next PEW bit */
1179 if(pke_check_stall(me, chk_vu))
1180 {
1181 /* VU busy */
1182 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1183 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1184 /* try again next cycle */
1185 }
1186 else
1187 {
1188 unsigned_4 vu_pc;
1189
1190 /* VU idle */
1191 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1192
1193 /* flip DBF on PKE1 */
1194 if(me->pke_number == 1)
1195 pke_flip_dbf(me);
1196
1197 /* read old PC */
1198 sim_read(NULL,
1199 (SIM_ADDR) (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1200 (void*) & vu_pc,
1201 sizeof(unsigned_4));
1202
1203 /* rewrite new PC; callback function gets VU running */
1204 sim_write(NULL,
1205 (SIM_ADDR) (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1206 (void*) & vu_pc,
1207 sizeof(unsigned_4));
1208
1209 /* done */
1210 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1211 pke_pc_advance(me, 1);
1212 }
1213 }
1214
1215
1216 void
1217 pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode)
1218 {
1219 int something_busy = 0;
1220
1221 /* compute next PEW, PGW bits */
1222 if(pke_check_stall(me, chk_vu))
1223 {
1224 something_busy = 1;
1225 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1226 }
1227 else
1228 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1229
1230
1231 if(pke_check_stall(me, chk_path1) ||
1232 pke_check_stall(me, chk_path2) ||
1233 pke_check_stall(me, chk_path3))
1234 {
1235 something_busy = 1;
1236 PKE_REG_MASK_SET(me, STAT, PGW, 1);
1237 }
1238 else
1239 PKE_REG_MASK_SET(me, STAT, PGW, 0);
1240
1241 /* go or no go */
1242 if(something_busy)
1243 {
1244 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1245 /* try again next cycle */
1246 }
1247 else
1248 {
1249 unsigned_4 vu_pc;
1250 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1251
1252 /* flip DBF on PKE1 */
1253 if(me->pke_number == 1)
1254 pke_flip_dbf(me);
1255
1256 /* compute new PC for VU */
1257 vu_pc = BIT_MASK_GET(imm, 0, 15);
1258 /* write new PC; callback function gets VU running */
1259 sim_write(NULL,
1260 (SIM_ADDR) (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1261 (void*) & vu_pc,
1262 sizeof(unsigned_4));
1263
1264 /* done */
1265 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1266 pke_pc_advance(me, 1);
1267 }
1268 }
1269
1270
1271 void
1272 pke_code_stmask(struct pke_device* me, unsigned_4 pkecode)
1273 {
1274 /* check that FIFO has one more word for STMASK operand */
1275 unsigned_4* mask;
1276
1277 mask = pke_pc_operand(me, 1);
1278 if(mask != NULL)
1279 {
1280 /* "transferring" operand */
1281 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1282
1283 /* set NUM */
1284 PKE_REG_MASK_SET(me, NUM, NUM, 1);
1285
1286 /* fill the register */
1287 PKE_REG_MASK_SET(me, MASK, MASK, *mask);
1288
1289 /* set NUM */
1290 PKE_REG_MASK_SET(me, NUM, NUM, 0);
1291
1292 /* done */
1293 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1294 pke_pc_advance(me, 2);
1295 }
1296 else
1297 {
1298 /* need to wait for another word */
1299 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1300 /* try again next cycle */
1301 }
1302 }
1303
1304
1305 void
1306 pke_code_strow(struct pke_device* me, unsigned_4 pkecode)
1307 {
1308 /* check that FIFO has four more words for STROW operand */
1309 unsigned_4* last_op;
1310
1311 last_op = pke_pc_operand(me, 4);
1312 if(last_op != NULL)
1313 {
1314 /* "transferring" operand */
1315 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1316
1317 /* set NUM */
1318 PKE_REG_MASK_SET(me, NUM, NUM, 1);
1319
1320 /* copy ROW registers: must all exist if 4th operand exists */
1321 me->regs[PKE_REG_R0][0] = * pke_pc_operand(me, 1);
1322 me->regs[PKE_REG_R1][0] = * pke_pc_operand(me, 2);
1323 me->regs[PKE_REG_R2][0] = * pke_pc_operand(me, 3);
1324 me->regs[PKE_REG_R3][0] = * pke_pc_operand(me, 4);
1325
1326 /* set NUM */
1327 PKE_REG_MASK_SET(me, NUM, NUM, 0);
1328
1329 /* done */
1330 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1331 pke_pc_advance(me, 5);
1332 }
1333 else
1334 {
1335 /* need to wait for another word */
1336 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1337 /* try again next cycle */
1338 }
1339 }
1340
1341
1342 void
1343 pke_code_stcol(struct pke_device* me, unsigned_4 pkecode)
1344 {
1345 /* check that FIFO has four more words for STCOL operand */
1346 unsigned_4* last_op;
1347
1348 last_op = pke_pc_operand(me, 4);
1349 if(last_op != NULL)
1350 {
1351 /* "transferring" operand */
1352 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1353
1354 /* set NUM */
1355 PKE_REG_MASK_SET(me, NUM, NUM, 1);
1356
1357 /* copy COL registers: must all exist if 4th operand exists */
1358 me->regs[PKE_REG_C0][0] = * pke_pc_operand(me, 1);
1359 me->regs[PKE_REG_C1][0] = * pke_pc_operand(me, 2);
1360 me->regs[PKE_REG_C2][0] = * pke_pc_operand(me, 3);
1361 me->regs[PKE_REG_C3][0] = * pke_pc_operand(me, 4);
1362
1363 /* set NUM */
1364 PKE_REG_MASK_SET(me, NUM, NUM, 0);
1365
1366 /* done */
1367 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1368 pke_pc_advance(me, 5);
1369 }
1370 else
1371 {
1372 /* need to wait for another word */
1373 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1374 /* try again next cycle */
1375 }
1376 }
1377
1378
1379 void
1380 pke_code_mpg(struct pke_device* me, unsigned_4 pkecode)
1381 {
1382 unsigned_4* last_mpg_word;
1383 int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
1384 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1385
1386 /* map zero to max+1 */
1387 if(num==0) num=0x100;
1388
1389 /* check that FIFO has a few more words for MPG operand */
1390 last_mpg_word = pke_pc_operand(me, num*2); /* num: number of 64-bit words */
1391 if(last_mpg_word != NULL)
1392 {
1393 /* perform implied FLUSHE */
1394 if(pke_check_stall(me, chk_vu))
1395 {
1396 /* VU idle */
1397 int i;
1398
1399 /* "transferring" operand */
1400 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1401
1402 /* transfer VU instructions, one word per iteration */
1403 for(i=0; i<num*2; i++)
1404 {
1405 address_word vu_addr_base, vu_addr;
1406 address_word vutrack_addr_base, vutrack_addr;
1407 unsigned_4* operand;
1408 struct fifo_quadword* fq = pke_pc_fifo(me, num, & operand);
1409
1410 /* set NUM */
1411 PKE_REG_MASK_SET(me, NUM, NUM, (num*2 - i) / 2);
1412
1413 /* imm: in 64-bit units for MPG instruction */
1414 /* VU*_MEM0 : instruction memory */
1415 vu_addr_base = (me->pke_number == 0) ?
1416 VU0_MEM0_WINDOW_START : VU0_MEM0_WINDOW_START;
1417 vu_addr = vu_addr_base + (imm*2) + i;
1418
1419 /* VU*_MEM0_TRACK : source-addr tracking table */
1420 vutrack_addr_base = (me->pke_number == 0) ?
1421 VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START;
1422 vutrack_addr = vu_addr_base + (imm*2) + i;
1423
1424 /* write data into VU memory */
1425 pke_track_write(me, operand, sizeof(unsigned_4),
1426 vu_addr, fq->source_address);
1427
1428 /* write srcaddr into VU srcaddr tracking table */
1429 sim_write(NULL,
1430 (SIM_ADDR) vutrack_addr,
1431 (void*) & fq->source_address,
1432 sizeof(unsigned_4));
1433 } /* VU xfer loop */
1434
1435 /* check NUM */
1436 ASSERT(PKE_REG_MASK_GET(me, NUM, NUM) == 0);
1437
1438 /* done */
1439 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1440 pke_pc_advance(me, 1 + num*2);
1441 }
1442 else
1443 {
1444 /* VU busy */
1445 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1446 /* retry this instruction next clock */
1447 }
1448 } /* if FIFO full enough */
1449 else
1450 {
1451 /* need to wait for another word */
1452 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1453 /* retry this instruction next clock */
1454 }
1455 }
1456
1457
1458 void
1459 pke_code_direct(struct pke_device* me, unsigned_4 pkecode)
1460 {
1461 /* check that FIFO has a few more words for DIRECT operand */
1462 unsigned_4* last_direct_word;
1463 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1464 int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
1465
1466 /* map zero to max+1 */
1467 if(imm==0) imm=0x10000;
1468
1469 last_direct_word = pke_pc_operand(me, imm*4); /* num: number of 128-bit words */
1470 if(last_direct_word != NULL)
1471 {
1472 /* VU idle */
1473 int i;
1474 quadword fifo_data;
1475
1476 /* "transferring" operand */
1477 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1478
1479 /* transfer GPUIF quadwords, one word per iteration */
1480 for(i=0; i<imm*4; i++)
1481 {
1482 unsigned_4* operand;
1483 struct fifo_quadword* fq = pke_pc_fifo(me, num, &operand);
1484
1485 /* collect word into quadword */
1486 fifo_data[i%4] = *operand;
1487
1488 /* write to GPUIF FIFO only with full word */
1489 if(i%4 == 3)
1490 {
1491 address_word gpuif_fifo = GIF_PATH2_FIFO_ADDR+(i/4);
1492 pke_track_write(me, fifo_data, sizeof(quadword),
1493 (SIM_ADDR) gpuif_fifo, fq->source_address);
1494 } /* write collected quadword */
1495
1496 } /* GPUIF xfer loop */
1497
1498 /* done */
1499 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1500 pke_pc_advance(me, 1 + imm*4);
1501 } /* if FIFO full enough */
1502 else
1503 {
1504 /* need to wait for another word */
1505 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1506 /* retry this instruction next clock */
1507 }
1508 }
1509
1510
1511 void
1512 pke_code_directhl(struct pke_device* me, unsigned_4 pkecode)
1513 {
1514 /* treat the same as DIRECTH */
1515 pke_code_direct(me, pkecode);
1516 }
1517
1518
1519 void
1520 pke_code_unpack(struct pke_device* me, unsigned_4 pkecode)
1521 {
1522 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1523 int cmd = BIT_MASK_GET(pkecode, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E);
1524 int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
1525 short vn = BIT_MASK_GET(cmd, 2, 3); /* unpack shape controls */
1526 short vl = BIT_MASK_GET(cmd, 0, 1);
1527 int m = BIT_MASK_GET(cmd, 4, 4);
1528 short cl = PKE_REG_MASK_GET(me, CYCLE, CL); /* cycle controls */
1529 short wl = PKE_REG_MASK_GET(me, CYCLE, WL);
1530 int r = BIT_MASK_GET(imm, 15, 15); /* indicator bits in imm value */
1531 int sx = BIT_MASK_GET(imm, 14, 14);
1532
1533 int n, num_operands;
1534 unsigned_4* last_operand_word;
1535
1536 /* map zero to max+1 */
1537 if(num==0) num=0x100;
1538
1539 /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */
1540 if(wl <= cl)
1541 n = num;
1542 else
1543 n = cl * (num/wl) + PKE_LIMIT(num % wl, cl);
1544 num_operands = ((32 >> vl) * (vn+1) * n)/32;
1545
1546 /* confirm that FIFO has enough words in it */
1547 last_operand_word = pke_pc_operand(me, num_operands);
1548 if(last_operand_word != NULL)
1549 {
1550 address_word vu_addr_base;
1551 int vector_num;
1552
1553 /* "transferring" operand */
1554 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1555
1556 /* don't check whether VU is idle */
1557
1558 /* compute VU address base */
1559 if(me->pke_number == 0)
1560 vu_addr_base = VU0_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9);
1561 else
1562 {
1563 vu_addr_base = VU1_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9);
1564 if(r) vu_addr_base += PKE_REG_MASK_GET(me, TOPS, TOPS);
1565 }
1566
1567 /* set NUM */
1568 PKE_REG_MASK_SET(me, NUM, NUM, num);
1569
1570 /* transfer given number of vectors */
1571 vector_num = 0; /* output vector number being processed */
1572 do
1573 {
1574 quadword vu_old_data;
1575 quadword vu_new_data;
1576 quadword unpacked_data;
1577 address_word vu_addr;
1578 unsigned_4 source_addr = 0;
1579 int i;
1580
1581 /* decrement NUM */
1582 PKE_REG_MASK_SET(me, NUM, NUM,
1583 PKE_REG_MASK_GET(me, NUM, NUM) - 1);
1584
1585 /* compute VU destination address, as bytes in R5900 memory */
1586 if(cl >= wl)
1587 {
1588 /* map zero to max+1 */
1589 if(wl == 0) wl = 0x0100;
1590 vu_addr = vu_addr_base + 16*(cl*(vector_num/wl) + (vector_num%wl));
1591 }
1592 else
1593 vu_addr = vu_addr_base + 16*vector_num;
1594
1595 /* XXX: can vu_addr overflow? */
1596
1597 /* read old VU data word at address */
1598 sim_read(NULL, (SIM_ADDR) vu_addr, (void*) & vu_old_data, sizeof(vu_old_data));
1599
1600 /* For cyclic unpack, next operand quadword may come from instruction stream
1601 or be zero. */
1602 if((cl < wl) && ((vector_num % wl) >= cl)) /* wl != 0, set above */
1603 {
1604 /* clear operand - used only in a "indeterminate" state */
1605 for(i = 0; i < 4; i++)
1606 unpacked_data[i] = 0;
1607 }
1608 else
1609 {
1610 /* compute packed vector dimensions */
1611 int vectorbits, unitbits;
1612
1613 if(vl < 3) /* PKE_UNPACK_*_{32,16,8} */
1614 {
1615 unitbits = (32 >> vl);
1616 vectorbits = unitbits * (vn+1);
1617 }
1618 else if(vl == 3 && vn == 3) /* PKE_UNPACK_V4_5 */
1619 {
1620 unitbits = 5;
1621 vectorbits = 16;
1622 }
1623 else /* illegal unpack variant */
1624 {
1625 /* treat as illegal instruction */
1626 pke_code_error(me, pkecode);
1627 return;
1628 }
1629
1630 /* loop over columns */
1631 for(i=0; i<=vn; i++)
1632 {
1633 unsigned_4 operand;
1634
1635 /* offset in bits in current operand word */
1636 int bitoffset =
1637 (vector_num * vectorbits) + (i * unitbits); /* # of bits from PKEcode */
1638
1639 /* last unit of V4_5 is only one bit wide */
1640 if(vl == 3 && vn == 3 && i == 3) /* PKE_UNPACK_V4_5 */
1641 unitbits = 1;
1642
1643 /* fetch bitfield operand */
1644 operand = pke_pc_operand_bits(me, bitoffset, unitbits, & source_addr);
1645
1646 /* selectively sign-extend; not for V4_5 1-bit value */
1647 if(sx && unitbits > 0)
1648 unpacked_data[i] = SEXT32(operand, unitbits-1);
1649 else
1650 unpacked_data[i] = operand;
1651 }
1652 } /* unpack word from instruction operand */
1653
1654 /* compute replacement word */
1655 if(m) /* use mask register? */
1656 {
1657 /* compute index into mask register for this word */
1658 int mask_index = PKE_LIMIT(vector_num % wl, 3); /* wl != 0, set above */
1659
1660 for(i=0; i<3; i++) /* loop over columns */
1661 {
1662 int mask_op = PKE_MASKREG_GET(me, mask_index, i);
1663 unsigned_4* masked_value = NULL;
1664 unsigned_4 zero = 0;
1665
1666 switch(mask_op)
1667 {
1668 case PKE_MASKREG_INPUT:
1669 /* for vn == 0, all columns are copied from column 0 */
1670 if(vn == 0)
1671 masked_value = & unpacked_data[0];
1672 else if(i > vn)
1673 masked_value = & zero; /* arbitrary data: undefined in spec */
1674 else
1675 masked_value = & unpacked_data[i];
1676 break;
1677
1678 case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */
1679 masked_value = & me->regs[PKE_REG_R0 + i][0];
1680 break;
1681
1682 case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */
1683 masked_value = & me->regs[PKE_REG_C0 + PKE_LIMIT(vector_num,3)][0];
1684 break;
1685
1686 case PKE_MASKREG_NOTHING:
1687 /* "write inhibit" by re-copying old data */
1688 masked_value = & vu_old_data[i];
1689 break;
1690
1691 default:
1692 ASSERT(0);
1693 /* no other cases possible */
1694 }
1695
1696 /* copy masked value for column */
1697 vu_new_data[i] = *masked_value;
1698 } /* loop over columns */
1699 } /* mask */
1700 else
1701 {
1702 /* no mask - just copy over entire unpacked quadword */
1703 memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data));
1704 }
1705
1706 /* process STMOD register for accumulation operations */
1707 switch(PKE_REG_MASK_GET(me, MODE, MDE))
1708 {
1709 case PKE_MODE_ADDROW: /* add row registers to output data */
1710 for(i=0; i<4; i++)
1711 /* exploit R0..R3 contiguity */
1712 vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
1713 break;
1714
1715 case PKE_MODE_ACCROW: /* add row registers to output data; accumulate */
1716 for(i=0; i<4; i++)
1717 {
1718 /* exploit R0..R3 contiguity */
1719 vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
1720 me->regs[PKE_REG_R0 + i][0] = vu_new_data[i];
1721 }
1722 break;
1723
1724 case PKE_MODE_INPUT: /* pass data through */
1725 default:
1726 ;
1727 }
1728
1729 /* write replacement word */
1730 pke_track_write(me, vu_new_data, sizeof(vu_new_data),
1731 (SIM_ADDR) vu_addr, source_addr);
1732
1733 /* next vector please */
1734 vector_num ++;
1735 } /* vector transfer loop */
1736 while(PKE_REG_MASK_GET(me, NUM, NUM) > 0);
1737
1738 /* done */
1739 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1740 pke_pc_advance(me, 1 + num_operands);
1741 } /* PKE FIFO full enough */
1742 else
1743 {
1744 /* need to wait for another word */
1745 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1746 /* retry this instruction next clock */
1747 }
1748 }
1749
1750
1751 void
1752 pke_code_error(struct pke_device* me, unsigned_4 pkecode)
1753 {
1754 /* set ER1 flag in STAT register */
1755 PKE_REG_MASK_SET(me, STAT, ER1, 1);
1756 /* advance over faulty word */
1757 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1758 pke_pc_advance(me, 1);
1759 }
This page took 0.066783 seconds and 4 git commands to generate.