* Major endianness fixes on sky code today. The milestone sample and existing
[deliverable/binutils-gdb.git] / sim / mips / sky-pke.c
1 /* Copyright (C) 1998, Cygnus Solutions */
2
3
4 /* Debugguing PKE? */
5 #define PKE_DEBUG
6
7
8 #include <stdlib.h>
9 #include "sky-pke.h"
10 #include "sky-dma.h"
11 #include "sim-bits.h"
12 #include "sim-assert.h"
13 #include "sky-vu0.h"
14 #include "sky-vu1.h"
15 #include "sky-gpuif.h"
16
17
18 /* Imported functions */
19
20 void device_error (device *me, char* message); /* device.c */
21
22
23 /* Internal function declarations */
24
25 static int pke_io_read_buffer(device*, void*, int, address_word,
26 unsigned, sim_cpu*, sim_cia);
27 static int pke_io_write_buffer(device*, const void*, int, address_word,
28 unsigned, sim_cpu*, sim_cia);
29 static void pke_issue(SIM_DESC, struct pke_device*);
30 static void pke_pc_advance(struct pke_device*, int num_words);
31 static unsigned_4* pke_pc_operand(struct pke_device*, int operand_num);
32 static unsigned_4 pke_pc_operand_bits(struct pke_device*, int bit_offset,
33 int bit_width, unsigned_4* sourceaddr);
34 static struct fifo_quadword* pke_pc_fifo(struct pke_device*, int operand_num,
35 unsigned_4** operand);
36 static void pke_attach(SIM_DESC sd, struct pke_device* me);
37 enum pke_check_target { chk_vu, chk_path1, chk_path2, chk_path3 };
38 static int pke_check_stall(struct pke_device* me, enum pke_check_target what);
39 static void pke_flip_dbf(struct pke_device* me);
40 /* PKEcode handlers */
41 static void pke_code_nop(struct pke_device* me, unsigned_4 pkecode);
42 static void pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode);
43 static void pke_code_offset(struct pke_device* me, unsigned_4 pkecode);
44 static void pke_code_base(struct pke_device* me, unsigned_4 pkecode);
45 static void pke_code_itop(struct pke_device* me, unsigned_4 pkecode);
46 static void pke_code_stmod(struct pke_device* me, unsigned_4 pkecode);
47 static void pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode);
48 static void pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode);
49 static void pke_code_flushe(struct pke_device* me, unsigned_4 pkecode);
50 static void pke_code_flush(struct pke_device* me, unsigned_4 pkecode);
51 static void pke_code_flusha(struct pke_device* me, unsigned_4 pkecode);
52 static void pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode);
53 static void pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode);
54 static void pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode);
55 static void pke_code_stmask(struct pke_device* me, unsigned_4 pkecode);
56 static void pke_code_strow(struct pke_device* me, unsigned_4 pkecode);
57 static void pke_code_stcol(struct pke_device* me, unsigned_4 pkecode);
58 static void pke_code_mpg(struct pke_device* me, unsigned_4 pkecode);
59 static void pke_code_direct(struct pke_device* me, unsigned_4 pkecode);
60 static void pke_code_directhl(struct pke_device* me, unsigned_4 pkecode);
61 static void pke_code_unpack(struct pke_device* me, unsigned_4 pkecode);
62 static void pke_code_error(struct pke_device* me, unsigned_4 pkecode);
63
64
65
66 /* Static data */
67
68 struct pke_device pke0_device =
69 {
70 { "pke0", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
71 0, 0, /* ID, flags */
72 {}, /* regs */
73 {}, 0, /* FIFO write buffer */
74 NULL, 0, 0, NULL, /* FIFO */
75 0, 0 /* pc */
76 };
77
78
79 struct pke_device pke1_device =
80 {
81 { "pke1", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
82 1, 0, /* ID, flags */
83 {}, /* regs */
84 {}, 0, /* FIFO write buffer */
85 NULL, 0, 0, NULL, /* FIFO */
86 0, 0 /* pc */
87 };
88
89
90
91 /* External functions */
92
93
94 /* Attach PKE addresses to main memory */
95
96 void
97 pke0_attach(SIM_DESC sd)
98 {
99 pke_attach(sd, & pke0_device);
100 }
101
102 void
103 pke1_attach(SIM_DESC sd)
104 {
105 pke_attach(sd, & pke1_device);
106 }
107
108
109
110 /* Issue a PKE instruction if possible */
111
112 void
113 pke0_issue(SIM_DESC sd)
114 {
115 pke_issue(sd, & pke0_device);
116 }
117
118 void
119 pke1_issue(SIM_DESC sd)
120 {
121 pke_issue(sd, & pke1_device);
122 }
123
124
125
126 /* Internal functions */
127
128
129 /* Attach PKE memory regions to simulator */
130
131 void
132 pke_attach(SIM_DESC sd, struct pke_device* me)
133 {
134 /* register file */
135 sim_core_attach (sd, NULL, 0, access_read_write, 0,
136 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START,
137 PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/,
138 0 /*modulo*/,
139 (device*) me,
140 NULL /*buffer*/);
141
142 /* FIFO port */
143 sim_core_attach (sd, NULL, 0, access_read_write, 0,
144 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR,
145 sizeof(quadword) /*nr_bytes*/,
146 0 /*modulo*/,
147 (device*) me,
148 NULL /*buffer*/);
149
150 /* VU MEM0 tracking table */
151 sim_core_attach (sd, NULL, 0, access_read_write, 0,
152 ((me->pke_number == 0) ? VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START),
153 ((me->pke_number == 0) ? VU0_MEM0_SIZE : VU1_MEM0_SIZE) / 2,
154 0 /*modulo*/,
155 NULL,
156 NULL /*buffer*/);
157
158 /* VU MEM1 tracking table */
159 sim_core_attach (sd, NULL, 0, access_read_write, 0,
160 ((me->pke_number == 0) ? VU0_MEM1_SRCADDR_START : VU1_MEM1_SRCADDR_START),
161 ((me->pke_number == 0) ? VU0_MEM1_SIZE : VU1_MEM1_SIZE) / 4,
162 0 /*modulo*/,
163 NULL,
164 NULL /*buffer*/);
165
166
167 /* attach to trace file if appropriate */
168 {
169 char trace_envvar[80];
170 char* trace_filename = NULL;
171 sprintf(trace_envvar, "VIF%d_TRACE_FILE", me->pke_number);
172 trace_filename = getenv(trace_envvar);
173 if(trace_filename != NULL)
174 {
175 me->fifo_trace_file = fopen(trace_filename, "w");
176 if(me->fifo_trace_file == NULL)
177 perror("VIF FIFO trace error on fopen");
178 else
179 setvbuf(me->fifo_trace_file, NULL, _IOLBF, 0);
180 }
181 }
182 }
183
184
185
186 /* Handle a PKE read; return no. of bytes read */
187
188 int
189 pke_io_read_buffer(device *me_,
190 void *dest,
191 int space,
192 address_word addr,
193 unsigned nr_bytes,
194 sim_cpu *cpu,
195 sim_cia cia)
196 {
197 /* downcast to gather embedding pke_device struct */
198 struct pke_device* me = (struct pke_device*) me_;
199
200 /* find my address ranges */
201 address_word my_reg_start =
202 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
203 address_word my_fifo_addr =
204 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
205
206 /* enforce that an access does not span more than one quadword */
207 address_word low = ADDR_TRUNC_QW(addr);
208 address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
209 if(low != high)
210 return 0;
211
212 /* classify address & handle */
213 if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
214 {
215 /* register bank */
216 int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
217 int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
218 int readable = 1;
219 quadword result;
220
221 /* clear result */
222 result[0] = result[1] = result[2] = result[3] = 0;
223
224 /* handle reads to individual registers; clear `readable' on error */
225 switch(reg_num)
226 {
227 /* handle common case of register reading, side-effect free */
228 /* PKE1-only registers*/
229 case PKE_REG_BASE:
230 case PKE_REG_OFST:
231 case PKE_REG_TOPS:
232 case PKE_REG_TOP:
233 case PKE_REG_DBF:
234 if(me->pke_number == 0)
235 readable = 0;
236 /* fall through */
237 /* PKE0 & PKE1 common registers*/
238 case PKE_REG_STAT:
239 case PKE_REG_ERR:
240 case PKE_REG_MARK:
241 case PKE_REG_CYCLE:
242 case PKE_REG_MODE:
243 case PKE_REG_NUM:
244 case PKE_REG_MASK:
245 case PKE_REG_CODE:
246 case PKE_REG_ITOPS:
247 case PKE_REG_ITOP:
248 case PKE_REG_R0:
249 case PKE_REG_R1:
250 case PKE_REG_R2:
251 case PKE_REG_R3:
252 case PKE_REG_C0:
253 case PKE_REG_C1:
254 case PKE_REG_C2:
255 case PKE_REG_C3:
256 result[0] = H2T_4(me->regs[reg_num][0]);
257 break;
258
259 /* handle common case of write-only registers */
260 case PKE_REG_FBRST:
261 readable = 0;
262 break;
263
264 default:
265 ASSERT(0); /* test above should prevent this possibility */
266 }
267
268 /* perform transfer & return */
269 if(readable)
270 {
271 /* copy the bits */
272 memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes);
273 /* okay */
274 }
275 else
276 {
277 /* return zero bits */
278 memset(dest, 0, nr_bytes);
279 }
280
281 return nr_bytes;
282 /* NOTREACHED */
283 }
284 else if(addr >= my_fifo_addr &&
285 addr < my_fifo_addr + sizeof(quadword))
286 {
287 /* FIFO */
288
289 /* FIFO is not readable: return a word of zeroes */
290 memset(dest, 0, nr_bytes);
291 return nr_bytes;
292 }
293
294 /* NOTREACHED */
295 return 0;
296 }
297
298
299 /* Handle a PKE read; return no. of bytes written */
300
301 int
302 pke_io_write_buffer(device *me_,
303 const void *src,
304 int space,
305 address_word addr,
306 unsigned nr_bytes,
307 sim_cpu *cpu,
308 sim_cia cia)
309 {
310 /* downcast to gather embedding pke_device struct */
311 struct pke_device* me = (struct pke_device*) me_;
312
313 /* find my address ranges */
314 address_word my_reg_start =
315 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
316 address_word my_fifo_addr =
317 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
318
319 /* enforce that an access does not span more than one quadword */
320 address_word low = ADDR_TRUNC_QW(addr);
321 address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
322 if(low != high)
323 return 0;
324
325 /* classify address & handle */
326 if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
327 {
328 /* register bank */
329 int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
330 int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
331 int writeable = 1;
332 quadword input;
333
334 /* clear input */
335 input[0] = input[1] = input[2] = input[3] = 0;
336
337 /* write user-given bytes into input */
338 memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes);
339
340 /* make words host-endian */
341 input[0] = T2H_4(input[0]);
342 /* we may ignore other words */
343
344 /* handle writes to individual registers; clear `writeable' on error */
345 switch(reg_num)
346 {
347 case PKE_REG_FBRST:
348 /* Order these tests from least to most overriding, in case
349 multiple bits are set. */
350 if(BIT_MASK_GET(input[0], PKE_REG_FBRST_STC_B, PKE_REG_FBRST_STC_E))
351 {
352 /* clear a bunch of status bits */
353 PKE_REG_MASK_SET(me, STAT, PSS, 0);
354 PKE_REG_MASK_SET(me, STAT, PFS, 0);
355 PKE_REG_MASK_SET(me, STAT, PIS, 0);
356 PKE_REG_MASK_SET(me, STAT, INT, 0);
357 PKE_REG_MASK_SET(me, STAT, ER0, 0);
358 PKE_REG_MASK_SET(me, STAT, ER1, 0);
359 me->flags &= ~PKE_FLAG_PENDING_PSS;
360 /* will allow resumption of possible stalled instruction */
361 }
362 if(BIT_MASK_GET(input[0], PKE_REG_FBRST_STP_B, PKE_REG_FBRST_STP_E))
363 {
364 me->flags |= PKE_FLAG_PENDING_PSS;
365 }
366 if(BIT_MASK_GET(input[0], PKE_REG_FBRST_FBK_B, PKE_REG_FBRST_FBK_E))
367 {
368 PKE_REG_MASK_SET(me, STAT, PFS, 1);
369 }
370 if(BIT_MASK_GET(input[0], PKE_REG_FBRST_RST_B, PKE_REG_FBRST_RST_E))
371 {
372 /* clear FIFO by skipping to word after PC: also
373 prevents re-execution attempt of possible stalled
374 instruction */
375 me->fifo_num_elements = me->fifo_pc;
376 /* clear registers, flag, other state */
377 memset(me->regs, 0, sizeof(me->regs));
378 me->fifo_qw_done = 0;
379 me->flags = 0;
380 me->qw_pc = 0;
381 }
382 break;
383
384 case PKE_REG_ERR:
385 /* copy bottom three bits */
386 BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input[0], 0, 2));
387 break;
388
389 case PKE_REG_MARK:
390 /* copy bottom sixteen bits */
391 PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input[0], 0, 15));
392 /* reset MRK bit in STAT */
393 PKE_REG_MASK_SET(me, STAT, MRK, 0);
394 break;
395
396 /* handle common case of read-only registers */
397 /* PKE1-only registers - not really necessary to handle separately */
398 case PKE_REG_BASE:
399 case PKE_REG_OFST:
400 case PKE_REG_TOPS:
401 case PKE_REG_TOP:
402 case PKE_REG_DBF:
403 if(me->pke_number == 0)
404 writeable = 0;
405 /* fall through */
406 /* PKE0 & PKE1 common registers*/
407 case PKE_REG_STAT:
408 /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */
409 case PKE_REG_CYCLE:
410 case PKE_REG_MODE:
411 case PKE_REG_NUM:
412 case PKE_REG_MASK:
413 case PKE_REG_CODE:
414 case PKE_REG_ITOPS:
415 case PKE_REG_ITOP:
416 case PKE_REG_R0:
417 case PKE_REG_R1:
418 case PKE_REG_R2:
419 case PKE_REG_R3:
420 case PKE_REG_C0:
421 case PKE_REG_C1:
422 case PKE_REG_C2:
423 case PKE_REG_C3:
424 writeable = 0;
425 break;
426
427 default:
428 ASSERT(0); /* test above should prevent this possibility */
429 }
430
431 /* perform return */
432 if(! writeable)
433 {
434 ; /* error */
435 }
436
437 return nr_bytes;
438
439 /* NOTREACHED */
440 }
441 else if(addr >= my_fifo_addr &&
442 addr < my_fifo_addr + sizeof(quadword))
443 {
444 /* FIFO */
445 struct fifo_quadword* fqw;
446 int fifo_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside fifo quadword */
447 unsigned_4 dma_tag_present = 0;
448 int i;
449
450 /* collect potentially-partial quadword in write buffer; LE byte order */
451 memcpy(((unsigned_1*)& me->fifo_qw_in_progress) + fifo_byte, src, nr_bytes);
452 /* mark bytes written */
453 for(i = fifo_byte; i < fifo_byte + nr_bytes; i++)
454 BIT_MASK_SET(me->fifo_qw_done, i, i, 1);
455
456 /* return if quadword not quite written yet */
457 if(BIT_MASK_GET(me->fifo_qw_done, 0, sizeof(quadword)-1) !=
458 BIT_MASK_BTW(0, sizeof(quadword)-1))
459 return nr_bytes;
460
461 /* all done - process quadword after clearing flag */
462 BIT_MASK_SET(me->fifo_qw_done, 0, sizeof(quadword)-1, 0);
463
464 /* ensure FIFO has enough elements */
465 if(me->fifo_num_elements == me->fifo_buffer_size)
466 {
467 /* time to grow */
468 int new_fifo_buffer_size = me->fifo_buffer_size + 20;
469 void* ptr = realloc((void*) me->fifo, new_fifo_buffer_size*sizeof(struct fifo_quadword));
470
471 if(ptr == NULL)
472 {
473 /* oops, cannot enlarge FIFO any more */
474 device_error(me_, "Cannot enlarge FIFO buffer\n");
475 return 0;
476 }
477
478 me->fifo = ptr;
479 me->fifo_buffer_size = new_fifo_buffer_size;
480 }
481
482 /* add new quadword at end of FIFO; store data in host-endian */
483 fqw = & me->fifo[me->fifo_num_elements];
484 fqw->word_class[0] = fqw->word_class[1] =
485 fqw->word_class[2] = fqw->word_class[3] = wc_unknown;
486 fqw->data[0] = T2H_4(me->fifo_qw_in_progress[0]);
487 fqw->data[1] = T2H_4(me->fifo_qw_in_progress[1]);
488 fqw->data[2] = T2H_4(me->fifo_qw_in_progress[2]);
489 fqw->data[3] = T2H_4(me->fifo_qw_in_progress[3]);
490 ASSERT(sizeof(unsigned_4) == 4);
491 PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_MADR : DMA_D1_MADR),
492 & fqw->source_address, /* converted to host-endian */
493 4);
494 PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_PKTFLAG : DMA_D1_PKTFLAG),
495 & dma_tag_present,
496 4);
497
498 if(dma_tag_present)
499 {
500 /* lower two words are DMA tags */
501 fqw->word_class[0] = fqw->word_class[1] = wc_dma;
502 }
503
504 me->fifo_num_elements++;
505
506 /* set FQC to "1" as FIFO is now not empty */
507 PKE_REG_MASK_SET(me, STAT, FQC, 1);
508
509 /* okay */
510 return nr_bytes;
511 }
512
513 /* NOTREACHED */
514 return 0;
515 }
516
517
518
519 /* Issue & swallow next PKE opcode if possible/available */
520
521 void
522 pke_issue(SIM_DESC sd, struct pke_device* me)
523 {
524 struct fifo_quadword* fqw;
525 unsigned_4 fw;
526 unsigned_4 cmd, intr, num;
527 unsigned_4 imm;
528
529 /* 1 -- test go / no-go for PKE execution */
530
531 /* switch on STAT:PSS if PSS-pending and in idle state */
532 if((PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE) &&
533 (me->flags & PKE_FLAG_PENDING_PSS) != 0)
534 {
535 me->flags &= ~PKE_FLAG_PENDING_PSS;
536 PKE_REG_MASK_SET(me, STAT, PSS, 1);
537 }
538
539 /* check for stall/halt control bits */
540 if(PKE_REG_MASK_GET(me, STAT, PFS) ||
541 PKE_REG_MASK_GET(me, STAT, PSS) || /* note special treatment below */
542 /* PEW bit not a reason to keep stalling - it's re-checked below */
543 /* PGW bit not a reason to keep stalling - it's re-checked below */
544 /* maskable stall controls: ER0, ER1, PIS */
545 (PKE_REG_MASK_GET(me, STAT, ER0) && !PKE_REG_MASK_GET(me, ERR, ME0)) ||
546 (PKE_REG_MASK_GET(me, STAT, ER1) && !PKE_REG_MASK_GET(me, ERR, ME1)) ||
547 (PKE_REG_MASK_GET(me, STAT, PIS) && !PKE_REG_MASK_GET(me, ERR, MII)))
548 {
549 /* try again next cycle; no state change */
550 return;
551 }
552
553 /* confirm availability of new quadword of PKE instructions */
554 if(me->fifo_num_elements <= me->fifo_pc)
555 return;
556
557
558 /* 2 -- fetch PKE instruction */
559
560 /* skip over DMA tag, if present */
561 pke_pc_advance(me, 0);
562
563 /* "fetch" instruction quadword and word */
564 fqw = & me->fifo[me->fifo_pc];
565 fw = fqw->data[me->qw_pc];
566
567 /* store word in PKECODE register */
568 me->regs[PKE_REG_CODE][0] = fw;
569
570
571 /* 3 -- decode PKE instruction */
572
573 /* PKE instruction format: [intr 0:0][pke-command 6:0][num 7:0][immediate 15:0],
574 so op-code is in top byte. */
575 intr = BIT_MASK_GET(fw, PKE_OPCODE_I_B, PKE_OPCODE_I_E);
576 cmd = BIT_MASK_GET(fw, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E);
577 num = BIT_MASK_GET(fw, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
578 imm = BIT_MASK_GET(fw, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
579
580 /* handle interrupts */
581 if(intr)
582 {
583 /* are we resuming an interrupt-flagged instruction? */
584 if(me->flags & PKE_FLAG_INT_NOLOOP)
585 {
586 /* clear loop-prevention flag */
587 me->flags &= ~PKE_FLAG_INT_NOLOOP;
588 /* mask interrupt bit from instruction word so re-decoded instructions don't stall */
589 BIT_MASK_SET(fw, PKE_OPCODE_I_B, PKE_OPCODE_I_E, 0);
590 }
591 else /* new interrupt-flagged instruction */
592 {
593 /* set INT flag in STAT register */
594 PKE_REG_MASK_SET(me, STAT, INT, 1);
595 /* set loop-prevention flag */
596 me->flags |= PKE_FLAG_INT_NOLOOP;
597
598 /* XXX: send interrupt to 5900? */
599 }
600 }
601
602 /* decoding */
603 if(PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE)
604 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE);
605
606 /* decode & execute */
607 if(IS_PKE_CMD(cmd, PKENOP))
608 pke_code_nop(me, fw);
609 else if(IS_PKE_CMD(cmd, STCYCL))
610 pke_code_stcycl(me, fw);
611 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, OFFSET))
612 pke_code_offset(me, fw);
613 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, BASE))
614 pke_code_base(me, fw);
615 else if(IS_PKE_CMD(cmd, ITOP))
616 pke_code_itop(me, fw);
617 else if(IS_PKE_CMD(cmd, STMOD))
618 pke_code_stmod(me, fw);
619 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3))
620 pke_code_mskpath3(me, fw);
621 else if(IS_PKE_CMD(cmd, PKEMARK))
622 pke_code_pkemark(me, fw);
623 else if(IS_PKE_CMD(cmd, FLUSHE))
624 pke_code_flushe(me, fw);
625 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSH))
626 pke_code_flush(me, fw);
627 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA))
628 pke_code_flusha(me, fw);
629 else if(IS_PKE_CMD(cmd, PKEMSCAL))
630 pke_code_pkemscal(me, fw);
631 else if(IS_PKE_CMD(cmd, PKEMSCNT))
632 pke_code_pkemscnt(me, fw);
633 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF))
634 pke_code_pkemscalf(me, fw);
635 else if(IS_PKE_CMD(cmd, STMASK))
636 pke_code_stmask(me, fw);
637 else if(IS_PKE_CMD(cmd, STROW))
638 pke_code_strow(me, fw);
639 else if(IS_PKE_CMD(cmd, STCOL))
640 pke_code_stcol(me, fw);
641 else if(IS_PKE_CMD(cmd, MPG))
642 pke_code_mpg(me, fw);
643 else if(IS_PKE_CMD(cmd, DIRECT))
644 pke_code_direct(me, fw);
645 else if(IS_PKE_CMD(cmd, DIRECTHL))
646 pke_code_directhl(me, fw);
647 else if(IS_PKE_CMD(cmd, UNPACK))
648 pke_code_unpack(me, fw);
649 /* ... no other commands ... */
650 else
651 pke_code_error(me, fw);
652 }
653
654
655
656 /* advance the PC by given number of data words; update STAT/FQC
657 field; assume FIFO is filled enough; classify passed-over words;
658 write FIFO trace line */
659
660 void
661 pke_pc_advance(struct pke_device* me, int num_words)
662 {
663 int num = num_words;
664 struct fifo_quadword* fq = NULL;
665 ASSERT(num_words >= 0);
666
667 /* printf("pke %d pc_advance num_words %d\n", me->pke_number, num_words); */
668
669 while(1)
670 {
671 fq = & me->fifo[me->fifo_pc];
672
673 /* skip over DMA tag words if present in word 0 or 1 */
674 if(fq->word_class[me->qw_pc] == wc_dma)
675 {
676 /* skip by going around loop an extra time */
677 num ++;
678 }
679
680 /* nothing left to skip / no DMA tag here */
681 if(num == 0)
682 break;
683
684 /* one word skipped */
685 num --;
686
687 /* point to next word */
688 me->qw_pc ++;
689 if(me->qw_pc == 4)
690 {
691 me->qw_pc = 0;
692 me->fifo_pc ++;
693
694 /* trace the consumption of the FIFO quadword we just skipped over */
695 /* fq still points to it */
696 if(me->fifo_trace_file != NULL)
697 {
698 /* assert complete classification */
699 ASSERT(fq->word_class[3] != wc_unknown);
700 ASSERT(fq->word_class[2] != wc_unknown);
701 ASSERT(fq->word_class[1] != wc_unknown);
702 ASSERT(fq->word_class[0] != wc_unknown);
703
704 /* print trace record */
705 fprintf(me->fifo_trace_file,
706 "%d 0x%08x_%08x_%08x_%08x 0x%08x %c%c%c%c\n",
707 (me->pke_number == 0 ? 0 : 1),
708 (unsigned) fq->data[3], (unsigned) fq->data[2],
709 (unsigned) fq->data[1], (unsigned) fq->data[0],
710 (unsigned) fq->source_address,
711 fq->word_class[3], fq->word_class[2],
712 fq->word_class[1], fq->word_class[0]);
713 }
714
715 /* XXX: zap old entries in FIFO */
716 } /* next quadword */
717 }
718
719 /* clear FQC if FIFO is now empty */
720 if(me->fifo_num_elements == me->fifo_pc)
721 {
722 PKE_REG_MASK_SET(me, STAT, FQC, 0);
723 }
724 else /* annote the word where the PC lands as an PKEcode */
725 {
726 fq = & me->fifo[me->fifo_pc];
727 ASSERT(fq->word_class[me->qw_pc] == wc_pkecode ||
728 fq->word_class[me->qw_pc] == wc_unknown);
729 fq->word_class[me->qw_pc] = wc_pkecode;
730 }
731 }
732
733
734
735 /* Return pointer to FIFO quadword containing given operand# in FIFO.
736 `operand_num' starts at 1. Return pointer to operand word in last
737 argument, if non-NULL. If FIFO is not full enough, return 0.
738 Signal an ER0 indication upon skipping a DMA tag. */
739
740 struct fifo_quadword*
741 pke_pc_fifo(struct pke_device* me, int operand_num, unsigned_4** operand)
742 {
743 int num = operand_num;
744 int new_qw_pc, new_fifo_pc;
745 struct fifo_quadword* fq = NULL;
746
747 ASSERT(num > 0);
748
749 /* snapshot current pointers */
750 new_fifo_pc = me->fifo_pc;
751 new_qw_pc = me->qw_pc;
752
753 /* printf("pke %d pc_fifo operand_num %d\n", me->pke_number, operand_num); */
754
755 do
756 {
757 /* one word skipped */
758 num --;
759
760 /* point to next word */
761 new_qw_pc ++;
762 if(new_qw_pc == 4)
763 {
764 new_qw_pc = 0;
765 new_fifo_pc ++;
766 }
767
768 /* check for FIFO underflow */
769 if(me->fifo_num_elements == new_fifo_pc)
770 {
771 fq = NULL;
772 break;
773 }
774
775 /* skip over DMA tag words if present in word 0 or 1 */
776 fq = & me->fifo[new_fifo_pc];
777 if(fq->word_class[new_qw_pc] == wc_dma)
778 {
779 /* mismatch error! */
780 PKE_REG_MASK_SET(me, STAT, ER0, 1);
781 /* skip by going around loop an extra time */
782 num ++;
783 }
784 }
785 while(num > 0);
786
787 /* return pointer to operand word itself */
788 if(fq != NULL)
789 {
790 *operand = & fq->data[new_qw_pc];
791
792 /* annote the word where the pseudo lands as an PKE operand */
793 ASSERT(fq->word_class[new_qw_pc] == wc_pkedata ||
794 fq->word_class[new_qw_pc] == wc_unknown);
795 fq->word_class[new_qw_pc] = wc_pkedata;
796 }
797
798 return fq;
799 }
800
801
802 /* Return pointer to given operand# in FIFO. `operand_num' starts at 1.
803 If FIFO is not full enough, return 0. Skip over DMA tags, but mark
804 them as an error (ER0). */
805
806 unsigned_4*
807 pke_pc_operand(struct pke_device* me, int operand_num)
808 {
809 unsigned_4* operand = NULL;
810 struct fifo_quadword* fifo_operand;
811
812 fifo_operand = pke_pc_fifo(me, operand_num, & operand);
813
814 if(fifo_operand == NULL)
815 ASSERT(operand == NULL); /* pke_pc_fifo() ought leave it untouched */
816
817 return operand;
818 }
819
820
821 /* Return a bit-field extract of given operand# in FIFO, and its
822 source-addr. `bit_offset' starts at 0, referring to LSB after PKE
823 instruction word. Width must be >0, <=32. Assume FIFO is full
824 enough. Skip over DMA tags, but mark them as an error (ER0). */
825
826 unsigned_4
827 pke_pc_operand_bits(struct pke_device* me, int bit_offset, int bit_width, unsigned_4* source_addr)
828 {
829 unsigned_4* word = NULL;
830 unsigned_4 value;
831 struct fifo_quadword* fifo_operand;
832 int wordnumber, bitnumber;
833
834 wordnumber = bit_offset/32;
835 bitnumber = bit_offset%32;
836
837 /* find operand word with bitfield */
838 fifo_operand = pke_pc_fifo(me, wordnumber + 1, &word);
839 ASSERT(word != NULL);
840
841 /* extract bitfield from word */
842 value = BIT_MASK_GET(*word, bitnumber, bitnumber + bit_width - 1);
843
844 /* extract source addr from fifo word */
845 *source_addr = fifo_operand->source_address;
846
847 return value;
848 }
849
850
851
852 /* check for stall conditions on indicated devices (path* only on
853 PKE1), do not change status; return 0 iff no stall */
854 int
855 pke_check_stall(struct pke_device* me, enum pke_check_target what)
856 {
857 int any_stall = 0;
858 unsigned_4 cop2_stat, gpuif_stat;
859
860 /* read status words */
861 ASSERT(sizeof(unsigned_4) == 4);
862 PKE_MEM_READ(me, (GIF_REG_STAT),
863 & gpuif_stat,
864 4);
865 PKE_MEM_READ(me, (COP2_REG_STAT_ADDR),
866 & cop2_stat,
867 4);
868
869 /* perform checks */
870 if(what == chk_vu)
871 {
872 if(me->pke_number == 0)
873 any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS0_B, COP2_REG_STAT_VBS0_E);
874 else /* if(me->pke_number == 1) */
875 any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS1_B, COP2_REG_STAT_VBS1_E);
876 }
877 else if(what == chk_path1) /* VU -> GPUIF */
878 {
879 if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 1)
880 any_stall = 1;
881 }
882 else if(what == chk_path2) /* PKE -> GPUIF */
883 {
884 if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 2)
885 any_stall = 1;
886 }
887 else if(what == chk_path3) /* DMA -> GPUIF */
888 {
889 if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 3)
890 any_stall = 1;
891 }
892 else
893 {
894 /* invalid what */
895 ASSERT(0);
896 }
897
898 /* any stall reasons? */
899 return any_stall;
900 }
901
902
903 /* PKE1 only: flip the DBF bit; recompute TOPS, TOP */
904 void
905 pke_flip_dbf(struct pke_device* me)
906 {
907 /* compute new TOP */
908 PKE_REG_MASK_SET(me, TOP, TOP,
909 PKE_REG_MASK_GET(me, TOPS, TOPS));
910 /* flip DBF */
911 PKE_REG_MASK_SET(me, DBF, DF,
912 PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1);
913 PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF));
914 /* compute new TOPS */
915 PKE_REG_MASK_SET(me, TOPS, TOPS,
916 (PKE_REG_MASK_GET(me, BASE, BASE) +
917 (PKE_REG_MASK_GET(me, DBF, DF) *
918 PKE_REG_MASK_GET(me, OFST, OFFSET))));
919 /* this is equivalent to last word from okadaa (98-02-25):
920 1) TOP=TOPS;
921 2) TOPS=BASE + !DBF*OFFSET
922 3) DBF=!DBF */
923 }
924
925
926
927 /* PKEcode handler functions -- responsible for checking and
928 confirming old stall conditions, executing pkecode, updating PC and
929 status registers -- may assume being run on correct PKE unit */
930
931 void
932 pke_code_nop(struct pke_device* me, unsigned_4 pkecode)
933 {
934 /* handle interrupts */
935 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
936 {
937 PKE_REG_MASK_SET(me, STAT, PIS, 1);
938 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
939 return;
940 }
941
942 /* done */
943 pke_pc_advance(me, 1);
944 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
945 }
946
947
948 void
949 pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode)
950 {
951 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
952
953 /* handle interrupts */
954 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
955 {
956 PKE_REG_MASK_SET(me, STAT, PIS, 1);
957 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
958 return;
959 }
960
961 /* copy immediate value into CYCLE reg */
962 PKE_REG_MASK_SET(me, CYCLE, WL, BIT_MASK_GET(imm, 8, 15));
963 PKE_REG_MASK_SET(me, CYCLE, CL, BIT_MASK_GET(imm, 0, 7));
964 /* done */
965 pke_pc_advance(me, 1);
966 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
967 }
968
969
970 void
971 pke_code_offset(struct pke_device* me, unsigned_4 pkecode)
972 {
973 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
974
975 /* handle interrupts */
976 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
977 {
978 PKE_REG_MASK_SET(me, STAT, PIS, 1);
979 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
980 return;
981 }
982
983 /* copy 10 bits to OFFSET field */
984 PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9));
985 /* clear DBF bit */
986 PKE_REG_MASK_SET(me, DBF, DF, 0);
987 /* clear other DBF bit */
988 PKE_REG_MASK_SET(me, STAT, DBF, 0);
989 /* set TOPS = BASE */
990 PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE));
991 /* done */
992 pke_pc_advance(me, 1);
993 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
994 }
995
996
997 void
998 pke_code_base(struct pke_device* me, unsigned_4 pkecode)
999 {
1000 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1001
1002 /* handle interrupts */
1003 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1004 {
1005 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1006 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1007 return;
1008 }
1009
1010 /* copy 10 bits to BASE field */
1011 PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9));
1012 /* done */
1013 pke_pc_advance(me, 1);
1014 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1015 }
1016
1017
1018 void
1019 pke_code_itop(struct pke_device* me, unsigned_4 pkecode)
1020 {
1021 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1022
1023 /* handle interrupts */
1024 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1025 {
1026 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1027 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1028 return;
1029 }
1030
1031 /* copy 10 bits to ITOPS field */
1032 PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9));
1033 /* done */
1034 pke_pc_advance(me, 1);
1035 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1036 }
1037
1038
1039 void
1040 pke_code_stmod(struct pke_device* me, unsigned_4 pkecode)
1041 {
1042 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1043
1044 /* handle interrupts */
1045 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1046 {
1047 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1048 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1049 return;
1050 }
1051
1052 /* copy 2 bits to MODE register */
1053 PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2));
1054 /* done */
1055 pke_pc_advance(me, 1);
1056 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1057 }
1058
1059
1060 void
1061 pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode)
1062 {
1063 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1064 unsigned_4 gif_mode;
1065
1066 /* handle interrupts */
1067 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1068 {
1069 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1070 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1071 return;
1072 }
1073
1074 /* set appropriate bit */
1075 if(BIT_MASK_GET(imm, PKE_REG_MSKPATH3_B, PKE_REG_MSKPATH3_E) != 0)
1076 gif_mode = GIF_REG_MODE_M3R_MASK;
1077 else
1078 gif_mode = 0;
1079
1080 /* write register; patrickm code will look at M3R bit only */
1081 PKE_MEM_WRITE(me, GIF_REG_MODE, & gif_mode, 4);
1082
1083 /* done */
1084 pke_pc_advance(me, 1);
1085 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1086 }
1087
1088
1089 void
1090 pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode)
1091 {
1092 /* ignore possible interrupt stall */
1093
1094 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1095 /* copy 16 bits to MARK register */
1096 PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15));
1097 /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */
1098 PKE_REG_MASK_SET(me, STAT, MRK, 1);
1099 /* done */
1100 pke_pc_advance(me, 1);
1101 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1102 }
1103
1104
1105 void
1106 pke_code_flushe(struct pke_device* me, unsigned_4 pkecode)
1107 {
1108 /* handle interrupts */
1109 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1110 {
1111 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1112 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1113 return;
1114 }
1115
1116 /* compute next PEW bit */
1117 if(pke_check_stall(me, chk_vu))
1118 {
1119 /* VU busy */
1120 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1121 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1122 /* try again next cycle */
1123 }
1124 else
1125 {
1126 /* VU idle */
1127 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1128 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1129 pke_pc_advance(me, 1);
1130 }
1131 }
1132
1133
1134 void
1135 pke_code_flush(struct pke_device* me, unsigned_4 pkecode)
1136 {
1137 int something_busy = 0;
1138
1139 /* handle interrupts */
1140 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1141 {
1142 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1143 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1144 return;
1145 }
1146
1147 /* compute next PEW, PGW bits */
1148 if(pke_check_stall(me, chk_vu))
1149 {
1150 something_busy = 1;
1151 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1152 }
1153 else
1154 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1155
1156
1157 if(pke_check_stall(me, chk_path1) ||
1158 pke_check_stall(me, chk_path2))
1159 {
1160 something_busy = 1;
1161 PKE_REG_MASK_SET(me, STAT, PGW, 1);
1162 }
1163 else
1164 PKE_REG_MASK_SET(me, STAT, PGW, 0);
1165
1166 /* go or no go */
1167 if(something_busy)
1168 {
1169 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1170 /* try again next cycle */
1171 }
1172 else
1173 {
1174 /* all idle */
1175 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1176 pke_pc_advance(me, 1);
1177 }
1178 }
1179
1180
1181 void
1182 pke_code_flusha(struct pke_device* me, unsigned_4 pkecode)
1183 {
1184 int something_busy = 0;
1185
1186 /* handle interrupts */
1187 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1188 {
1189 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1190 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1191 return;
1192 }
1193
1194 /* compute next PEW, PGW bits */
1195 if(pke_check_stall(me, chk_vu))
1196 {
1197 something_busy = 1;
1198 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1199 }
1200 else
1201 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1202
1203
1204 if(pke_check_stall(me, chk_path1) ||
1205 pke_check_stall(me, chk_path2) ||
1206 pke_check_stall(me, chk_path3))
1207 {
1208 something_busy = 1;
1209 PKE_REG_MASK_SET(me, STAT, PGW, 1);
1210 }
1211 else
1212 PKE_REG_MASK_SET(me, STAT, PGW, 0);
1213
1214 if(something_busy)
1215 {
1216 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1217 /* try again next cycle */
1218 }
1219 else
1220 {
1221 /* all idle */
1222 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1223 pke_pc_advance(me, 1);
1224 }
1225 }
1226
1227
1228 void
1229 pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode)
1230 {
1231 /* handle interrupts */
1232 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1233 {
1234 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1235 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1236 return;
1237 }
1238
1239 /* compute next PEW bit */
1240 if(pke_check_stall(me, chk_vu))
1241 {
1242 /* VU busy */
1243 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1244 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1245 /* try again next cycle */
1246 }
1247 else
1248 {
1249 unsigned_4 vu_pc;
1250 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1251
1252 /* VU idle */
1253 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1254
1255 /* flip DBF on PKE1 */
1256 if(me->pke_number == 1)
1257 pke_flip_dbf(me);
1258
1259 /* compute new PC for VU (host byte-order) */
1260 vu_pc = BIT_MASK_GET(imm, 0, 15);
1261 vu_pc = T2H_4(vu_pc);
1262
1263 /* write new PC; callback function gets VU running */
1264 ASSERT(sizeof(unsigned_4) == 4);
1265 PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1266 & vu_pc,
1267 4);
1268
1269 /* copy ITOPS field to ITOP */
1270 PKE_REG_MASK_SET(me, ITOP, ITOP, PKE_REG_MASK_GET(me, ITOPS, ITOPS));
1271
1272 /* done */
1273 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1274 pke_pc_advance(me, 1);
1275 }
1276 }
1277
1278
1279
1280 void
1281 pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode)
1282 {
1283 /* handle interrupts */
1284 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1285 {
1286 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1287 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1288 return;
1289 }
1290
1291 /* compute next PEW bit */
1292 if(pke_check_stall(me, chk_vu))
1293 {
1294 /* VU busy */
1295 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1296 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1297 /* try again next cycle */
1298 }
1299 else
1300 {
1301 unsigned_4 vu_pc;
1302
1303 /* VU idle */
1304 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1305
1306 /* flip DBF on PKE1 */
1307 if(me->pke_number == 1)
1308 pke_flip_dbf(me);
1309
1310 /* read old PC */
1311 ASSERT(sizeof(unsigned_4) == 4);
1312 PKE_MEM_READ(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1313 & vu_pc,
1314 4);
1315
1316 /* rewrite new PC; callback function gets VU running */
1317 ASSERT(sizeof(unsigned_4) == 4);
1318 PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1319 & vu_pc,
1320 4);
1321
1322 /* copy ITOPS field to ITOP */
1323 PKE_REG_MASK_SET(me, ITOP, ITOP, PKE_REG_MASK_GET(me, ITOPS, ITOPS));
1324
1325 /* done */
1326 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1327 pke_pc_advance(me, 1);
1328 }
1329 }
1330
1331
1332 void
1333 pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode)
1334 {
1335 int something_busy = 0;
1336
1337 /* handle interrupts */
1338 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1339 {
1340 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1341 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1342 return;
1343 }
1344
1345 /* compute next PEW, PGW bits */
1346 if(pke_check_stall(me, chk_vu))
1347 {
1348 something_busy = 1;
1349 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1350 }
1351 else
1352 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1353
1354
1355 if(pke_check_stall(me, chk_path1) ||
1356 pke_check_stall(me, chk_path2) ||
1357 pke_check_stall(me, chk_path3))
1358 {
1359 something_busy = 1;
1360 PKE_REG_MASK_SET(me, STAT, PGW, 1);
1361 }
1362 else
1363 PKE_REG_MASK_SET(me, STAT, PGW, 0);
1364
1365 /* go or no go */
1366 if(something_busy)
1367 {
1368 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1369 /* try again next cycle */
1370 }
1371 else
1372 {
1373 unsigned_4 vu_pc;
1374 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1375
1376 /* flip DBF on PKE1 */
1377 if(me->pke_number == 1)
1378 pke_flip_dbf(me);
1379
1380 /* compute new PC for VU (host byte-order) */
1381 vu_pc = BIT_MASK_GET(imm, 0, 15);
1382 vu_pc = T2H_4(vu_pc);
1383
1384 /* rewrite new PC; callback function gets VU running */
1385 ASSERT(sizeof(unsigned_4) == 4);
1386 PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1387 & vu_pc,
1388 4);
1389
1390 /* copy ITOPS field to ITOP */
1391 PKE_REG_MASK_SET(me, ITOP, ITOP, PKE_REG_MASK_GET(me, ITOPS, ITOPS));
1392
1393 /* done */
1394 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1395 pke_pc_advance(me, 1);
1396 }
1397 }
1398
1399
1400 void
1401 pke_code_stmask(struct pke_device* me, unsigned_4 pkecode)
1402 {
1403 unsigned_4* mask;
1404
1405 /* handle interrupts */
1406 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1407 {
1408 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1409 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1410 return;
1411 }
1412
1413 /* check that FIFO has one more word for STMASK operand */
1414 mask = pke_pc_operand(me, 1);
1415 if(mask != NULL)
1416 {
1417 /* "transferring" operand */
1418 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1419
1420 /* set NUM */
1421 PKE_REG_MASK_SET(me, NUM, NUM, 1);
1422
1423 /* fill the register */
1424 PKE_REG_MASK_SET(me, MASK, MASK, *mask);
1425
1426 /* set NUM */
1427 PKE_REG_MASK_SET(me, NUM, NUM, 0);
1428
1429 /* done */
1430 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1431 pke_pc_advance(me, 2);
1432 }
1433 else
1434 {
1435 /* need to wait for another word */
1436 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1437 /* try again next cycle */
1438 }
1439 }
1440
1441
1442 void
1443 pke_code_strow(struct pke_device* me, unsigned_4 pkecode)
1444 {
1445 /* check that FIFO has four more words for STROW operand */
1446 unsigned_4* last_op;
1447
1448 /* handle interrupts */
1449 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1450 {
1451 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1452 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1453 return;
1454 }
1455
1456 last_op = pke_pc_operand(me, 4);
1457 if(last_op != NULL)
1458 {
1459 /* "transferring" operand */
1460 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1461
1462 /* set NUM */
1463 PKE_REG_MASK_SET(me, NUM, NUM, 1);
1464
1465 /* copy ROW registers: must all exist if 4th operand exists */
1466 me->regs[PKE_REG_R0][0] = * pke_pc_operand(me, 1);
1467 me->regs[PKE_REG_R1][0] = * pke_pc_operand(me, 2);
1468 me->regs[PKE_REG_R2][0] = * pke_pc_operand(me, 3);
1469 me->regs[PKE_REG_R3][0] = * pke_pc_operand(me, 4);
1470
1471 /* set NUM */
1472 PKE_REG_MASK_SET(me, NUM, NUM, 0);
1473
1474 /* done */
1475 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1476 pke_pc_advance(me, 5);
1477 }
1478 else
1479 {
1480 /* need to wait for another word */
1481 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1482 /* try again next cycle */
1483 }
1484 }
1485
1486
1487 void
1488 pke_code_stcol(struct pke_device* me, unsigned_4 pkecode)
1489 {
1490 /* check that FIFO has four more words for STCOL operand */
1491 unsigned_4* last_op;
1492
1493 /* handle interrupts */
1494 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1495 {
1496 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1497 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1498 return;
1499 }
1500
1501 last_op = pke_pc_operand(me, 4);
1502 if(last_op != NULL)
1503 {
1504 /* "transferring" operand */
1505 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1506
1507 /* set NUM */
1508 PKE_REG_MASK_SET(me, NUM, NUM, 1);
1509
1510 /* copy COL registers: must all exist if 4th operand exists */
1511 me->regs[PKE_REG_C0][0] = * pke_pc_operand(me, 1);
1512 me->regs[PKE_REG_C1][0] = * pke_pc_operand(me, 2);
1513 me->regs[PKE_REG_C2][0] = * pke_pc_operand(me, 3);
1514 me->regs[PKE_REG_C3][0] = * pke_pc_operand(me, 4);
1515
1516 /* set NUM */
1517 PKE_REG_MASK_SET(me, NUM, NUM, 0);
1518
1519 /* done */
1520 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1521 pke_pc_advance(me, 5);
1522 }
1523 else
1524 {
1525 /* need to wait for another word */
1526 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1527 /* try again next cycle */
1528 }
1529 }
1530
1531
1532 void
1533 pke_code_mpg(struct pke_device* me, unsigned_4 pkecode)
1534 {
1535 unsigned_4* last_mpg_word;
1536 int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
1537 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1538
1539 /* assert 64-bit alignment of MPG operand */
1540 if(me->qw_pc != 3 && me->qw_pc != 1)
1541 return pke_code_error(me, pkecode);
1542
1543 /* handle interrupts */
1544 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1545 {
1546 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1547 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1548 return;
1549 }
1550
1551 /* map zero to max+1 */
1552 if(num==0) num=0x100;
1553
1554 /* check that FIFO has a few more words for MPG operand */
1555 last_mpg_word = pke_pc_operand(me, num*2); /* num: number of 64-bit words */
1556 if(last_mpg_word != NULL)
1557 {
1558 /* perform implied FLUSHE */
1559 if(pke_check_stall(me, chk_vu))
1560 {
1561 /* VU busy */
1562 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1563 /* retry this instruction next clock */
1564 }
1565 else
1566 {
1567 /* VU idle */
1568 int i;
1569
1570 /* "transferring" operand */
1571 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1572
1573 /* set NUM */
1574 PKE_REG_MASK_SET(me, NUM, NUM, num);
1575
1576 /* transfer VU instructions, one word-pair per iteration */
1577 for(i=0; i<num; i++)
1578 {
1579 address_word vu_addr_base, vu_addr;
1580 address_word vutrack_addr_base, vutrack_addr;
1581 address_word vu_addr_max_size;
1582 unsigned_4 vu_lower_opcode, vu_upper_opcode;
1583 unsigned_4* operand;
1584 struct fifo_quadword* fq;
1585 int next_num;
1586
1587 /* decrement NUM */
1588 next_num = PKE_REG_MASK_GET(me, NUM, NUM) - 1;
1589 PKE_REG_MASK_SET(me, NUM, NUM, next_num);
1590
1591 /* imm: in 64-bit units for MPG instruction */
1592 /* VU*_MEM0 : instruction memory */
1593 vu_addr_base = (me->pke_number == 0) ?
1594 VU0_MEM0_WINDOW_START : VU1_MEM0_WINDOW_START;
1595 vu_addr_max_size = (me->pke_number == 0) ?
1596 VU0_MEM0_SIZE : VU1_MEM0_SIZE;
1597 vutrack_addr_base = (me->pke_number == 0) ?
1598 VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START;
1599
1600 /* compute VU address for this word-pair */
1601 vu_addr = vu_addr_base + (imm + i) * 8;
1602 /* check for vu_addr overflow */
1603 while(vu_addr >= vu_addr_base + vu_addr_max_size)
1604 vu_addr -= vu_addr_max_size;
1605
1606 /* compute VU tracking address */
1607 vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 2;
1608
1609 /* Fetch operand words; assume they are already little-endian for VU imem */
1610 fq = pke_pc_fifo(me, i*2 + 1, & operand);
1611 vu_lower_opcode = *operand;
1612 vu_upper_opcode = *pke_pc_operand(me, i*2 + 2);
1613
1614 /* write data into VU memory */
1615 /* lower (scalar) opcode comes in first word ; macro performs H2T! */
1616 PKE_MEM_WRITE(me, vu_addr,
1617 & vu_lower_opcode,
1618 4);
1619 /* upper (vector) opcode comes in second word ; H2T */
1620 ASSERT(sizeof(unsigned_4) == 4);
1621 PKE_MEM_WRITE(me, vu_addr + 4,
1622 & vu_upper_opcode,
1623 4);
1624
1625 /* write tracking address in target byte-order */
1626 ASSERT(sizeof(unsigned_4) == 4);
1627 PKE_MEM_WRITE(me, vutrack_addr,
1628 & fq->source_address,
1629 4);
1630 } /* VU xfer loop */
1631
1632 /* check NUM */
1633 ASSERT(PKE_REG_MASK_GET(me, NUM, NUM) == 0);
1634
1635 /* done */
1636 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1637 pke_pc_advance(me, 1 + num*2);
1638 }
1639 } /* if FIFO full enough */
1640 else
1641 {
1642 /* need to wait for another word */
1643 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1644 /* retry this instruction next clock */
1645 }
1646 }
1647
1648
1649 void
1650 pke_code_direct(struct pke_device* me, unsigned_4 pkecode)
1651 {
1652 /* check that FIFO has a few more words for DIRECT operand */
1653 unsigned_4* last_direct_word;
1654 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1655
1656 /* assert 128-bit alignment of DIRECT operand */
1657 if(me->qw_pc != 3)
1658 return pke_code_error(me, pkecode);
1659
1660 /* handle interrupts */
1661 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1662 {
1663 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1664 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1665 return;
1666 }
1667
1668 /* map zero to max+1 */
1669 if(imm==0) imm=0x10000;
1670
1671 last_direct_word = pke_pc_operand(me, imm*4); /* imm: number of 128-bit words */
1672 if(last_direct_word != NULL)
1673 {
1674 /* VU idle */
1675 int i;
1676 unsigned_16 fifo_data;
1677
1678 /* "transferring" operand */
1679 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1680
1681 /* transfer GPUIF quadwords, one word per iteration */
1682 for(i=0; i<imm*4; i++)
1683 {
1684 unsigned_4* operand = pke_pc_operand(me, 1+i);
1685
1686 /* collect word into quadword */
1687 *A4_16(&fifo_data, 3 - (i % 4)) = *operand;
1688
1689 /* write to GPUIF FIFO only with full quadword */
1690 if(i % 4 == 3)
1691 {
1692 ASSERT(sizeof(fifo_data) == 16);
1693 PKE_MEM_WRITE(me, GIF_PATH2_FIFO_ADDR,
1694 & fifo_data,
1695 16);
1696 } /* write collected quadword */
1697
1698 } /* GPUIF xfer loop */
1699
1700 /* done */
1701 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1702 pke_pc_advance(me, 1 + imm*4);
1703 } /* if FIFO full enough */
1704 else
1705 {
1706 /* need to wait for another word */
1707 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1708 /* retry this instruction next clock */
1709 }
1710 }
1711
1712
1713 void
1714 pke_code_directhl(struct pke_device* me, unsigned_4 pkecode)
1715 {
1716 /* treat the same as DIRECTH */
1717 pke_code_direct(me, pkecode);
1718 }
1719
1720
1721 void
1722 pke_code_unpack(struct pke_device* me, unsigned_4 pkecode)
1723 {
1724 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1725 int cmd = BIT_MASK_GET(pkecode, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E);
1726 int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
1727 short vn = BIT_MASK_GET(cmd, 2, 3); /* unpack shape controls */
1728 short vl = BIT_MASK_GET(cmd, 0, 1);
1729 int m = BIT_MASK_GET(cmd, 4, 4);
1730 short cl = PKE_REG_MASK_GET(me, CYCLE, CL); /* cycle controls */
1731 short wl = PKE_REG_MASK_GET(me, CYCLE, WL);
1732 int r = BIT_MASK_GET(imm, 15, 15); /* indicator bits in imm value */
1733 int usn = BIT_MASK_GET(imm, 14, 14);
1734
1735 int n, num_operands;
1736 unsigned_4* last_operand_word = NULL;
1737
1738 /* handle interrupts */
1739 if(BIT_MASK_GET(pkecode, PKE_OPCODE_I_B, PKE_OPCODE_I_E))
1740 {
1741 PKE_REG_MASK_SET(me, STAT, PIS, 1);
1742 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1743 return;
1744 }
1745
1746 /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */
1747 if(wl <= cl)
1748 n = num;
1749 else
1750 n = cl * (num/wl) + PKE_LIMIT(num % wl, cl);
1751 num_operands = ((32 >> vl) * (vn+1) * n)/32;
1752
1753 /* confirm that FIFO has enough words in it */
1754 if(num_operands > 0)
1755 last_operand_word = pke_pc_operand(me, num_operands);
1756 if(last_operand_word != NULL || num_operands == 0)
1757 {
1758 address_word vu_addr_base, vutrack_addr_base;
1759 address_word vu_addr_max_size;
1760 int vector_num_out, vector_num_in;
1761
1762 /* "transferring" operand */
1763 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1764
1765 /* don't check whether VU is idle */
1766
1767 /* compute VU address base */
1768 if(me->pke_number == 0)
1769 {
1770 vu_addr_base = VU0_MEM1_WINDOW_START;
1771 vu_addr_max_size = VU0_MEM1_SIZE;
1772 vutrack_addr_base = VU0_MEM1_SRCADDR_START;
1773 r = 0;
1774 }
1775 else
1776 {
1777 vu_addr_base = VU1_MEM1_WINDOW_START;
1778 vu_addr_max_size = VU1_MEM1_SIZE;
1779 vutrack_addr_base = VU1_MEM1_SRCADDR_START;
1780 }
1781
1782 /* set NUM */
1783 PKE_REG_MASK_SET(me, NUM, NUM, num == 0 ? 0x100 : num );
1784
1785 /* transfer given number of vectors */
1786 vector_num_out = 0; /* output vector number being processed */
1787 vector_num_in = 0; /* argument vector number being processed */
1788 do
1789 {
1790 quadword vu_old_data;
1791 quadword vu_new_data;
1792 quadword unpacked_data;
1793 address_word vu_addr;
1794 address_word vutrack_addr;
1795 unsigned_4 source_addr = 0;
1796 int i;
1797 int next_num;
1798
1799 /* decrement NUM */
1800 next_num = PKE_REG_MASK_GET(me, NUM, NUM) - 1;
1801 PKE_REG_MASK_SET(me, NUM, NUM, next_num);
1802
1803 /* compute VU destination address, as bytes in R5900 memory */
1804 if(cl >= wl)
1805 {
1806 /* map zero to max+1 */
1807 int addrwl = (wl == 0) ? 0x0100 : wl;
1808 vu_addr = vu_addr_base + 16 * (BIT_MASK_GET(imm, 0, 9) +
1809 (r ? PKE_REG_MASK_GET(me, TOPS, TOPS) : 0) +
1810 cl*(vector_num_out/addrwl) +
1811 (vector_num_out%addrwl));
1812 }
1813 else
1814 vu_addr = vu_addr_base + 16 * (BIT_MASK_GET(imm, 0, 9) +
1815 (r ? PKE_REG_MASK_GET(me, TOPS, TOPS) : 0) +
1816 vector_num_out);
1817
1818 /* check for vu_addr overflow */
1819 while(vu_addr >= vu_addr_base + vu_addr_max_size)
1820 vu_addr -= vu_addr_max_size;
1821
1822 /* compute address of tracking table entry */
1823 vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 4;
1824
1825 /* read old VU data word at address; reverse words if needed */
1826 {
1827 unsigned_16 vu_old_badwords;
1828 ASSERT(sizeof(vu_old_badwords) == 16);
1829 PKE_MEM_READ(me, vu_addr,
1830 &vu_old_badwords, 16);
1831 vu_old_data[0] = * A4_16(& vu_old_badwords, 3);
1832 vu_old_data[1] = * A4_16(& vu_old_badwords, 2);
1833 vu_old_data[2] = * A4_16(& vu_old_badwords, 1);
1834 vu_old_data[3] = * A4_16(& vu_old_badwords, 0);
1835 }
1836
1837 /* For cyclic unpack, next operand quadword may come from instruction stream
1838 or be zero. */
1839 if((num == 0 && cl == 0 && wl == 0) || /* shortcut clear */
1840 ((cl < wl) && ((vector_num_out % wl) >= cl))) /* wl != 0, set above */
1841 {
1842 /* clear operand - used only in a "indeterminate" state */
1843 for(i = 0; i < 4; i++)
1844 unpacked_data[i] = 0;
1845 }
1846 else
1847 {
1848 /* compute packed vector dimensions */
1849 int vectorbits, unitbits;
1850
1851 if(vl < 3) /* PKE_UNPACK_*_{32,16,8} */
1852 {
1853 unitbits = (32 >> vl);
1854 vectorbits = unitbits * (vn+1);
1855 }
1856 else if(vl == 3 && vn == 3) /* PKE_UNPACK_V4_5 */
1857 {
1858 unitbits = 5;
1859 vectorbits = 16;
1860 }
1861 else /* illegal unpack variant */
1862 {
1863 /* treat as illegal instruction */
1864 pke_code_error(me, pkecode);
1865 return;
1866 }
1867
1868 /* loop over columns */
1869 for(i=0; i<=vn; i++)
1870 {
1871 unsigned_4 operand;
1872
1873 /* offset in bits in current operand word */
1874 int bitoffset =
1875 (vector_num_in * vectorbits) + (i * unitbits); /* # of bits from PKEcode */
1876
1877 /* last unit of V4_5 is only one bit wide */
1878 if(vl == 3 && vn == 3 && i == 3) /* PKE_UNPACK_V4_5 */
1879 unitbits = 1;
1880
1881 /* fetch bitfield operand */
1882 operand = pke_pc_operand_bits(me, bitoffset, unitbits, & source_addr);
1883
1884 /* selectively sign-extend; not for V4_5 1-bit value */
1885 if(usn || unitbits == 1)
1886 unpacked_data[i] = operand;
1887 else
1888 unpacked_data[i] = SEXT32(operand, unitbits-1);
1889 }
1890
1891 /* consumed a vector from the PKE instruction stream */
1892 vector_num_in ++;
1893 } /* unpack word from instruction operand */
1894
1895 /* compute replacement word */
1896 if(m) /* use mask register? */
1897 {
1898 /* compute index into mask register for this word */
1899 int addrwl = (wl == 0) ? 0x0100 : wl;
1900 int mask_index = PKE_LIMIT(vector_num_out % addrwl, 3);
1901
1902 for(i=0; i<4; i++) /* loop over columns */
1903 {
1904 int mask_op = PKE_MASKREG_GET(me, mask_index, i);
1905 unsigned_4* masked_value = NULL;
1906 unsigned_4 zero = 0;
1907
1908 switch(mask_op)
1909 {
1910 case PKE_MASKREG_INPUT:
1911 /* for vn == 0, all columns are copied from column 0 */
1912 if(vn == 0)
1913 masked_value = & unpacked_data[0];
1914 else if(i > vn)
1915 masked_value = & zero; /* arbitrary data: undefined in spec */
1916 else
1917 masked_value = & unpacked_data[i];
1918 break;
1919
1920 case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */
1921 masked_value = & me->regs[PKE_REG_R0 + i][0];
1922 break;
1923
1924 case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */
1925 masked_value = & me->regs[PKE_REG_C0 + mask_index][0];
1926 break;
1927
1928 case PKE_MASKREG_NOTHING:
1929 /* "write inhibit" by re-copying old data */
1930 masked_value = & vu_old_data[i];
1931 break;
1932
1933 default:
1934 ASSERT(0);
1935 /* no other cases possible */
1936 }
1937
1938 /* copy masked value for column */
1939 vu_new_data[i] = *masked_value;
1940 } /* loop over columns */
1941 } /* mask */
1942 else
1943 {
1944 /* no mask - just copy over entire unpacked quadword */
1945 memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data));
1946 }
1947
1948 /* process STMOD register for accumulation operations */
1949 switch(PKE_REG_MASK_GET(me, MODE, MDE))
1950 {
1951 case PKE_MODE_ADDROW: /* add row registers to output data */
1952 for(i=0; i<4; i++)
1953 /* exploit R0..R3 contiguity */
1954 vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
1955 break;
1956
1957 case PKE_MODE_ACCROW: /* add row registers to output data; accumulate */
1958 for(i=0; i<4; i++)
1959 {
1960 /* exploit R0..R3 contiguity */
1961 vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
1962 me->regs[PKE_REG_R0 + i][0] = vu_new_data[i];
1963 }
1964 break;
1965
1966 case PKE_MODE_INPUT: /* pass data through */
1967 default:
1968 ;
1969 }
1970
1971 /* write new VU data word at address; reverse words if needed */
1972 {
1973 unsigned_16 vu_new_badwords;
1974 * A4_16(& vu_new_badwords, 3) = vu_new_data[0];
1975 * A4_16(& vu_new_badwords, 2) = vu_new_data[1];
1976 * A4_16(& vu_new_badwords, 1) = vu_new_data[2];
1977 * A4_16(& vu_new_badwords, 0) = vu_new_data[3];
1978 ASSERT(sizeof(vu_new_badwords) == 16);
1979 PKE_MEM_WRITE(me, vu_addr,
1980 &vu_new_badwords, 16);
1981 }
1982
1983 /* write tracking address */
1984 ASSERT(sizeof(unsigned_4) == 4);
1985 PKE_MEM_WRITE(me, vutrack_addr,
1986 & source_addr,
1987 4);
1988
1989 /* next vector please */
1990 vector_num_out ++;
1991 } /* vector transfer loop */
1992 while(PKE_REG_MASK_GET(me, NUM, NUM) > 0);
1993
1994 /* done */
1995 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1996 pke_pc_advance(me, 1 + num_operands);
1997 } /* PKE FIFO full enough */
1998 else
1999 {
2000 /* need to wait for another word */
2001 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
2002 /* retry this instruction next clock */
2003 }
2004 }
2005
2006
2007 void
2008 pke_code_error(struct pke_device* me, unsigned_4 pkecode)
2009 {
2010 /* set ER1 flag in STAT register */
2011 PKE_REG_MASK_SET(me, STAT, ER1, 1);
2012 /* advance over faulty word */
2013 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
2014 pke_pc_advance(me, 1);
2015 }
This page took 0.070659 seconds and 5 git commands to generate.