c9c0adf929ac32536bfd260df60586e7a46e1ed7
[deliverable/binutils-gdb.git] / sim / mips / sky-pke.c
1 /* Copyright (C) 1998, Cygnus Solutions */
2
3
4 /* Debugguing PKE? */
5 #define PKE_DEBUG
6
7
8 #include <stdlib.h>
9 #include "sky-pke.h"
10 #include "sky-dma.h"
11 #include "sim-bits.h"
12 #include "sim-assert.h"
13 #include "sky-vu0.h"
14 #include "sky-vu1.h"
15 #include "sky-gpuif.h"
16
17
18 /* Imported functions */
19
20 void device_error (device *me, char* message); /* device.c */
21
22
23 /* Internal function declarations */
24
25 static int pke_io_read_buffer(device*, void*, int, address_word,
26 unsigned, sim_cpu*, sim_cia);
27 static int pke_io_write_buffer(device*, const void*, int, address_word,
28 unsigned, sim_cpu*, sim_cia);
29 static void pke_issue(SIM_DESC, struct pke_device*);
30 static void pke_pc_advance(struct pke_device*, int num_words);
31 static unsigned_4* pke_pc_operand(struct pke_device*, int operand_num);
32 static unsigned_4 pke_pc_operand_bits(struct pke_device*, int bit_offset,
33 int bit_width, unsigned_4* sourceaddr);
34 static struct fifo_quadword* pke_pc_fifo(struct pke_device*, int operand_num,
35 unsigned_4** operand);
36 static void pke_attach(SIM_DESC sd, struct pke_device* me);
37 enum pke_check_target { chk_vu, chk_path1, chk_path2, chk_path3 };
38 static int pke_check_stall(struct pke_device* me, enum pke_check_target what);
39 static void pke_flip_dbf(struct pke_device* me);
40 /* PKEcode handlers */
41 static void pke_code_nop(struct pke_device* me, unsigned_4 pkecode);
42 static void pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode);
43 static void pke_code_offset(struct pke_device* me, unsigned_4 pkecode);
44 static void pke_code_base(struct pke_device* me, unsigned_4 pkecode);
45 static void pke_code_itop(struct pke_device* me, unsigned_4 pkecode);
46 static void pke_code_stmod(struct pke_device* me, unsigned_4 pkecode);
47 static void pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode);
48 static void pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode);
49 static void pke_code_flushe(struct pke_device* me, unsigned_4 pkecode);
50 static void pke_code_flush(struct pke_device* me, unsigned_4 pkecode);
51 static void pke_code_flusha(struct pke_device* me, unsigned_4 pkecode);
52 static void pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode);
53 static void pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode);
54 static void pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode);
55 static void pke_code_stmask(struct pke_device* me, unsigned_4 pkecode);
56 static void pke_code_strow(struct pke_device* me, unsigned_4 pkecode);
57 static void pke_code_stcol(struct pke_device* me, unsigned_4 pkecode);
58 static void pke_code_mpg(struct pke_device* me, unsigned_4 pkecode);
59 static void pke_code_direct(struct pke_device* me, unsigned_4 pkecode);
60 static void pke_code_directhl(struct pke_device* me, unsigned_4 pkecode);
61 static void pke_code_unpack(struct pke_device* me, unsigned_4 pkecode);
62 static void pke_code_error(struct pke_device* me, unsigned_4 pkecode);
63
64
65
66 /* Static data */
67
68 struct pke_device pke0_device =
69 {
70 { "pke0", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
71 0, 0, /* ID, flags */
72 {}, /* regs */
73 {}, 0, /* FIFO write buffer */
74 NULL, 0, 0, NULL, /* FIFO */
75 0, 0 /* pc */
76 };
77
78
79 struct pke_device pke1_device =
80 {
81 { "pke1", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
82 1, 0, /* ID, flags */
83 {}, /* regs */
84 {}, 0, /* FIFO write buffer */
85 NULL, 0, 0, NULL, /* FIFO */
86 0, 0 /* pc */
87 };
88
89
90
91 /* External functions */
92
93
94 /* Attach PKE addresses to main memory */
95
96 void
97 pke0_attach(SIM_DESC sd)
98 {
99 pke_attach(sd, & pke0_device);
100 }
101
102 void
103 pke1_attach(SIM_DESC sd)
104 {
105 pke_attach(sd, & pke1_device);
106 }
107
108
109
110 /* Issue a PKE instruction if possible */
111
112 void
113 pke0_issue(SIM_DESC sd)
114 {
115 pke_issue(sd, & pke0_device);
116 }
117
118 void
119 pke1_issue(SIM_DESC sd)
120 {
121 pke_issue(sd, & pke1_device);
122 }
123
124
125
126 /* Internal functions */
127
128
129 /* Attach PKE memory regions to simulator */
130
131 void
132 pke_attach(SIM_DESC sd, struct pke_device* me)
133 {
134 /* register file */
135 sim_core_attach (sd, NULL, 0, access_read_write, 0,
136 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START,
137 PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/,
138 0 /*modulo*/,
139 (device*) me,
140 NULL /*buffer*/);
141
142 /* FIFO port */
143 sim_core_attach (sd, NULL, 0, access_read_write, 0,
144 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR,
145 sizeof(quadword) /*nr_bytes*/,
146 0 /*modulo*/,
147 (device*) me,
148 NULL /*buffer*/);
149
150 /* VU MEM0 tracking table */
151 sim_core_attach (sd, NULL, 0, access_read_write, 0,
152 ((me->pke_number == 0) ? VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START),
153 ((me->pke_number == 0) ? VU0_MEM0_SIZE : VU1_MEM0_SIZE) / 2,
154 0 /*modulo*/,
155 NULL,
156 NULL /*buffer*/);
157
158 /* VU MEM1 tracking table */
159 sim_core_attach (sd, NULL, 0, access_read_write, 0,
160 ((me->pke_number == 0) ? VU0_MEM1_SRCADDR_START : VU1_MEM1_SRCADDR_START),
161 ((me->pke_number == 0) ? VU0_MEM1_SIZE : VU1_MEM1_SIZE) / 4,
162 0 /*modulo*/,
163 NULL,
164 NULL /*buffer*/);
165
166
167 /* attach to trace file if appropriate */
168 {
169 char trace_envvar[80];
170 char* trace_filename = NULL;
171 sprintf(trace_envvar, "VIF%d_TRACE_FILE", me->pke_number);
172 trace_filename = getenv(trace_envvar);
173 if(trace_filename != NULL)
174 {
175 me->fifo_trace_file = fopen(trace_filename, "w");
176 if(me->fifo_trace_file == NULL)
177 perror("VIF FIFO trace error on fopen");
178 else
179 setvbuf(me->fifo_trace_file, NULL, _IOLBF, 0);
180 }
181 }
182 }
183
184
185
186 /* Handle a PKE read; return no. of bytes read */
187
188 int
189 pke_io_read_buffer(device *me_,
190 void *dest,
191 int space,
192 address_word addr,
193 unsigned nr_bytes,
194 sim_cpu *cpu,
195 sim_cia cia)
196 {
197 /* downcast to gather embedding pke_device struct */
198 struct pke_device* me = (struct pke_device*) me_;
199
200 /* find my address ranges */
201 address_word my_reg_start =
202 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
203 address_word my_fifo_addr =
204 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
205
206 /* enforce that an access does not span more than one quadword */
207 address_word low = ADDR_TRUNC_QW(addr);
208 address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
209 if(low != high)
210 return 0;
211
212 /* classify address & handle */
213 if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
214 {
215 /* register bank */
216 int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
217 int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
218 int readable = 1;
219 quadword result;
220
221 /* clear result */
222 result[0] = result[1] = result[2] = result[3] = 0;
223
224 /* handle reads to individual registers; clear `readable' on error */
225 switch(reg_num)
226 {
227 /* handle common case of register reading, side-effect free */
228 /* PKE1-only registers*/
229 case PKE_REG_BASE:
230 case PKE_REG_OFST:
231 case PKE_REG_TOPS:
232 case PKE_REG_TOP:
233 case PKE_REG_DBF:
234 if(me->pke_number == 0)
235 readable = 0;
236 /* fall through */
237 /* PKE0 & PKE1 common registers*/
238 case PKE_REG_STAT:
239 case PKE_REG_ERR:
240 case PKE_REG_MARK:
241 case PKE_REG_CYCLE:
242 case PKE_REG_MODE:
243 case PKE_REG_NUM:
244 case PKE_REG_MASK:
245 case PKE_REG_CODE:
246 case PKE_REG_ITOPS:
247 case PKE_REG_ITOP:
248 case PKE_REG_R0:
249 case PKE_REG_R1:
250 case PKE_REG_R2:
251 case PKE_REG_R3:
252 case PKE_REG_C0:
253 case PKE_REG_C1:
254 case PKE_REG_C2:
255 case PKE_REG_C3:
256 result[0] = me->regs[reg_num][0];
257 break;
258
259 /* handle common case of write-only registers */
260 case PKE_REG_FBRST:
261 readable = 0;
262 break;
263
264 default:
265 ASSERT(0); /* test above should prevent this possibility */
266 }
267
268 /* perform transfer & return */
269 if(readable)
270 {
271 /* copy the bits */
272 memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes);
273 /* okay */
274 }
275 else
276 {
277 /* return zero bits */
278 memset(dest, 0, nr_bytes);
279 }
280
281 return nr_bytes;
282 /* NOTREACHED */
283 }
284 else if(addr >= my_fifo_addr &&
285 addr < my_fifo_addr + sizeof(quadword))
286 {
287 /* FIFO */
288
289 /* FIFO is not readable: return a word of zeroes */
290 memset(dest, 0, nr_bytes);
291 return nr_bytes;
292 }
293
294 /* NOTREACHED */
295 return 0;
296 }
297
298
299 /* Handle a PKE read; return no. of bytes written */
300
301 int
302 pke_io_write_buffer(device *me_,
303 const void *src,
304 int space,
305 address_word addr,
306 unsigned nr_bytes,
307 sim_cpu *cpu,
308 sim_cia cia)
309 {
310 /* downcast to gather embedding pke_device struct */
311 struct pke_device* me = (struct pke_device*) me_;
312
313 /* find my address ranges */
314 address_word my_reg_start =
315 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
316 address_word my_fifo_addr =
317 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
318
319 /* enforce that an access does not span more than one quadword */
320 address_word low = ADDR_TRUNC_QW(addr);
321 address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
322 if(low != high)
323 return 0;
324
325 /* classify address & handle */
326 if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
327 {
328 /* register bank */
329 int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
330 int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
331 int writeable = 1;
332 quadword input;
333
334 /* clear input */
335 input[0] = input[1] = input[2] = input[3] = 0;
336
337 /* write user-given bytes into input */
338 memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes);
339
340 /* handle writes to individual registers; clear `writeable' on error */
341 switch(reg_num)
342 {
343 case PKE_REG_FBRST:
344 /* Order these tests from least to most overriding, in case
345 multiple bits are set. */
346 if(BIT_MASK_GET(input[0], PKE_REG_FBRST_STC_B, PKE_REG_FBRST_STC_E))
347 {
348 /* clear a bunch of status bits */
349 PKE_REG_MASK_SET(me, STAT, PSS, 0);
350 PKE_REG_MASK_SET(me, STAT, PFS, 0);
351 PKE_REG_MASK_SET(me, STAT, PIS, 0);
352 PKE_REG_MASK_SET(me, STAT, INT, 0);
353 PKE_REG_MASK_SET(me, STAT, ER0, 0);
354 PKE_REG_MASK_SET(me, STAT, ER1, 0);
355 me->flags &= ~PKE_FLAG_PENDING_PSS;
356 /* will allow resumption of possible stalled instruction */
357 }
358 if(BIT_MASK_GET(input[0], PKE_REG_FBRST_STP_B, PKE_REG_FBRST_STP_E))
359 {
360 me->flags |= PKE_FLAG_PENDING_PSS;
361 }
362 if(BIT_MASK_GET(input[0], PKE_REG_FBRST_FBK_B, PKE_REG_FBRST_FBK_E))
363 {
364 PKE_REG_MASK_SET(me, STAT, PFS, 1);
365 }
366 if(BIT_MASK_GET(input[0], PKE_REG_FBRST_RST_B, PKE_REG_FBRST_RST_E))
367 {
368 /* clear FIFO by skipping to word after PC: also
369 prevents re-execution attempt of possible stalled
370 instruction */
371 me->fifo_num_elements = me->fifo_pc;
372 /* clear registers, flag, other state */
373 memset(me->regs, 0, sizeof(me->regs));
374 me->fifo_qw_done = 0;
375 me->flags = 0;
376 me->qw_pc = 0;
377 }
378 break;
379
380 case PKE_REG_ERR:
381 /* copy bottom three bits */
382 BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input[0], 0, 2));
383 break;
384
385 case PKE_REG_MARK:
386 /* copy bottom sixteen bits */
387 PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input[0], 0, 15));
388 /* reset MRK bit in STAT */
389 PKE_REG_MASK_SET(me, STAT, MRK, 0);
390 break;
391
392 /* handle common case of read-only registers */
393 /* PKE1-only registers - not really necessary to handle separately */
394 case PKE_REG_BASE:
395 case PKE_REG_OFST:
396 case PKE_REG_TOPS:
397 case PKE_REG_TOP:
398 case PKE_REG_DBF:
399 if(me->pke_number == 0)
400 writeable = 0;
401 /* fall through */
402 /* PKE0 & PKE1 common registers*/
403 case PKE_REG_STAT:
404 /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */
405 case PKE_REG_CYCLE:
406 case PKE_REG_MODE:
407 case PKE_REG_NUM:
408 case PKE_REG_MASK:
409 case PKE_REG_CODE:
410 case PKE_REG_ITOPS:
411 case PKE_REG_ITOP:
412 case PKE_REG_R0:
413 case PKE_REG_R1:
414 case PKE_REG_R2:
415 case PKE_REG_R3:
416 case PKE_REG_C0:
417 case PKE_REG_C1:
418 case PKE_REG_C2:
419 case PKE_REG_C3:
420 writeable = 0;
421 break;
422
423 default:
424 ASSERT(0); /* test above should prevent this possibility */
425 }
426
427 /* perform return */
428 if(! writeable)
429 {
430 ; /* error */
431 }
432
433 return nr_bytes;
434
435 /* NOTREACHED */
436 }
437 else if(addr >= my_fifo_addr &&
438 addr < my_fifo_addr + sizeof(quadword))
439 {
440 /* FIFO */
441 struct fifo_quadword* fqw;
442 int fifo_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside fifo quadword */
443 unsigned_4 dma_tag_present = 0;
444 int i;
445
446 /* collect potentially-partial quadword in write buffer */
447 memcpy(((unsigned_1*)& me->fifo_qw_in_progress) + fifo_byte, src, nr_bytes);
448 /* mark bytes written */
449 for(i = fifo_byte; i < fifo_byte + nr_bytes; i++)
450 BIT_MASK_SET(me->fifo_qw_done, i, i, 1);
451
452 /* return if quadword not quite written yet */
453 if(BIT_MASK_GET(me->fifo_qw_done, 0, sizeof(quadword)-1) !=
454 BIT_MASK_BTW(0, sizeof(quadword)-1))
455 return nr_bytes;
456
457 /* all done - process quadword after clearing flag */
458 BIT_MASK_SET(me->fifo_qw_done, 0, sizeof(quadword)-1, 0);
459
460 /* ensure FIFO has enough elements */
461 if(me->fifo_num_elements == me->fifo_buffer_size)
462 {
463 /* time to grow */
464 int new_fifo_buffer_size = me->fifo_buffer_size + 20;
465 void* ptr = realloc((void*) me->fifo, new_fifo_buffer_size*sizeof(struct fifo_quadword));
466
467 if(ptr == NULL)
468 {
469 /* oops, cannot enlarge FIFO any more */
470 device_error(me_, "Cannot enlarge FIFO buffer\n");
471 return 0;
472 }
473
474 me->fifo = ptr;
475 me->fifo_buffer_size = new_fifo_buffer_size;
476 }
477
478 /* add new quadword at end of FIFO */
479 fqw = & me->fifo[me->fifo_num_elements];
480 fqw->word_class[0] = fqw->word_class[1] =
481 fqw->word_class[2] = fqw->word_class[3] = wc_unknown;
482 memcpy((void*) fqw->data, me->fifo_qw_in_progress, sizeof(quadword));
483 ASSERT(sizeof(unsigned_4) == 4);
484 PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_MADR : DMA_D1_MADR),
485 & fqw->source_address, /* target endian */
486 4);
487 fqw->source_address = T2H_4(fqw->source_address);
488 PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_PKTFLAG : DMA_D1_PKTFLAG),
489 & dma_tag_present,
490 4);
491
492 if(dma_tag_present)
493 {
494 /* lower two words are DMA tags */
495 fqw->word_class[0] = fqw->word_class[1] = wc_dma;
496 }
497
498 me->fifo_num_elements++;
499
500 /* set FQC to "1" as FIFO is now not empty */
501 PKE_REG_MASK_SET(me, STAT, FQC, 1);
502
503 /* okay */
504 return nr_bytes;
505 }
506
507 /* NOTREACHED */
508 return 0;
509 }
510
511
512
513 /* Issue & swallow next PKE opcode if possible/available */
514
515 void
516 pke_issue(SIM_DESC sd, struct pke_device* me)
517 {
518 struct fifo_quadword* fqw;
519 unsigned_4 fw;
520 unsigned_4 cmd, intr, num;
521 unsigned_4 imm;
522
523 /* 1 -- test go / no-go for PKE execution */
524
525 /* switch on STAT:PSS if PSS-pending and in idle state */
526 if((PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE) &&
527 (me->flags & PKE_FLAG_PENDING_PSS) != 0)
528 {
529 me->flags &= ~PKE_FLAG_PENDING_PSS;
530 PKE_REG_MASK_SET(me, STAT, PSS, 1);
531 }
532
533 /* check for stall/halt control bits */
534 if(PKE_REG_MASK_GET(me, STAT, PFS) ||
535 PKE_REG_MASK_GET(me, STAT, PSS) || /* note special treatment below */
536 /* PEW bit not a reason to keep stalling - it's re-checked below */
537 /* PGW bit not a reason to keep stalling - it's re-checked below */
538 /* maskable stall controls: ER0, ER1, PIS */
539 (PKE_REG_MASK_GET(me, STAT, ER0) && !PKE_REG_MASK_GET(me, ERR, ME0)) ||
540 (PKE_REG_MASK_GET(me, STAT, ER1) && !PKE_REG_MASK_GET(me, ERR, ME1)) ||
541 (PKE_REG_MASK_GET(me, STAT, PIS) && !PKE_REG_MASK_GET(me, ERR, MII)))
542 {
543 /* try again next cycle; no state change */
544 return;
545 }
546
547 /* confirm availability of new quadword of PKE instructions */
548 if(me->fifo_num_elements <= me->fifo_pc)
549 return;
550
551
552 /* 2 -- fetch PKE instruction */
553
554 /* skip over DMA tag, if present */
555 pke_pc_advance(me, 0);
556
557 /* "fetch" instruction quadword and word */
558 fqw = & me->fifo[me->fifo_pc];
559 fw = fqw->data[me->qw_pc];
560
561 /* store word in PKECODE register */
562 me->regs[PKE_REG_CODE][0] = fw;
563
564
565 /* 3 -- decode PKE instruction */
566
567 /* PKE instruction format: [intr 0:0][pke-command 6:0][num 7:0][immediate 15:0],
568 so op-code is in top byte. */
569 intr = BIT_MASK_GET(fw, PKE_OPCODE_I_B, PKE_OPCODE_I_E);
570 cmd = BIT_MASK_GET(fw, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E);
571 num = BIT_MASK_GET(fw, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
572 imm = BIT_MASK_GET(fw, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
573
574 if(intr)
575 {
576 /* set INT flag in STAT register */
577 PKE_REG_MASK_SET(me, STAT, INT, 1);
578 /* XXX: send interrupt to 5900? */
579 }
580
581 /* decoding */
582 if(PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE)
583 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE);
584
585 /* decode & execute */
586 if(IS_PKE_CMD(cmd, PKENOP))
587 pke_code_nop(me, fw);
588 else if(IS_PKE_CMD(cmd, STCYCL))
589 pke_code_stcycl(me, fw);
590 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, OFFSET))
591 pke_code_offset(me, fw);
592 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, BASE))
593 pke_code_base(me, fw);
594 else if(IS_PKE_CMD(cmd, ITOP))
595 pke_code_itop(me, fw);
596 else if(IS_PKE_CMD(cmd, STMOD))
597 pke_code_stmod(me, fw);
598 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3))
599 pke_code_mskpath3(me, fw);
600 else if(IS_PKE_CMD(cmd, PKEMARK))
601 pke_code_pkemark(me, fw);
602 else if(IS_PKE_CMD(cmd, FLUSHE))
603 pke_code_flushe(me, fw);
604 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSH))
605 pke_code_flush(me, fw);
606 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA))
607 pke_code_flusha(me, fw);
608 else if(IS_PKE_CMD(cmd, PKEMSCAL))
609 pke_code_pkemscal(me, fw);
610 else if(IS_PKE_CMD(cmd, PKEMSCNT))
611 pke_code_pkemscnt(me, fw);
612 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF))
613 pke_code_pkemscalf(me, fw);
614 else if(IS_PKE_CMD(cmd, STMASK))
615 pke_code_stmask(me, fw);
616 else if(IS_PKE_CMD(cmd, STROW))
617 pke_code_strow(me, fw);
618 else if(IS_PKE_CMD(cmd, STCOL))
619 pke_code_stcol(me, fw);
620 else if(IS_PKE_CMD(cmd, MPG))
621 pke_code_mpg(me, fw);
622 else if(IS_PKE_CMD(cmd, DIRECT))
623 pke_code_direct(me, fw);
624 else if(IS_PKE_CMD(cmd, DIRECTHL))
625 pke_code_directhl(me, fw);
626 else if(IS_PKE_CMD(cmd, UNPACK))
627 pke_code_unpack(me, fw);
628 /* ... no other commands ... */
629 else
630 pke_code_error(me, fw);
631 }
632
633
634
635 /* advance the PC by given number of data words; update STAT/FQC
636 field; assume FIFO is filled enough; classify passed-over words;
637 write FIFO trace line */
638
639 void
640 pke_pc_advance(struct pke_device* me, int num_words)
641 {
642 int num = num_words;
643 struct fifo_quadword* fq = NULL;
644 ASSERT(num_words >= 0);
645
646 /* printf("pke %d pc_advance num_words %d\n", me->pke_number, num_words); */
647
648 while(1)
649 {
650 fq = & me->fifo[me->fifo_pc];
651
652 /* skip over DMA tag words if present in word 0 or 1 */
653 if(fq->word_class[me->qw_pc] == wc_dma)
654 {
655 /* skip by going around loop an extra time */
656 num ++;
657 }
658
659 /* nothing left to skip / no DMA tag here */
660 if(num == 0)
661 break;
662
663 /* one word skipped */
664 num --;
665
666 /* point to next word */
667 me->qw_pc ++;
668 if(me->qw_pc == 4)
669 {
670 me->qw_pc = 0;
671 me->fifo_pc ++;
672
673 /* trace the consumption of the FIFO quadword we just skipped over */
674 /* fq still points to it */
675 if(me->fifo_trace_file != NULL)
676 {
677 /* assert complete classification */
678 ASSERT(fq->word_class[3] != wc_unknown);
679 ASSERT(fq->word_class[2] != wc_unknown);
680 ASSERT(fq->word_class[1] != wc_unknown);
681 ASSERT(fq->word_class[0] != wc_unknown);
682
683 /* print trace record */
684 fprintf(me->fifo_trace_file,
685 "%d 0x%08x_%08x_%08x_%08x 0x%08x %c%c%c%c\n",
686 (me->pke_number == 0 ? 0 : 1),
687 (unsigned) fq->data[3], (unsigned) fq->data[2],
688 (unsigned) fq->data[1], (unsigned) fq->data[0],
689 (unsigned) fq->source_address,
690 fq->word_class[3], fq->word_class[2],
691 fq->word_class[1], fq->word_class[0]);
692 }
693
694 /* XXX: zap old entries in FIFO */
695 } /* next quadword */
696 }
697
698 /* clear FQC if FIFO is now empty */
699 if(me->fifo_num_elements == me->fifo_pc)
700 {
701 PKE_REG_MASK_SET(me, STAT, FQC, 0);
702 }
703 else /* annote the word where the PC lands as an PKEcode */
704 {
705 fq = & me->fifo[me->fifo_pc];
706 ASSERT(fq->word_class[me->qw_pc] == wc_pkecode ||
707 fq->word_class[me->qw_pc] == wc_unknown);
708 fq->word_class[me->qw_pc] = wc_pkecode;
709 }
710 }
711
712
713
714 /* Return pointer to FIFO quadword containing given operand# in FIFO.
715 `operand_num' starts at 1. Return pointer to operand word in last
716 argument, if non-NULL. If FIFO is not full enough, return 0.
717 Signal an ER0 indication upon skipping a DMA tag. */
718
719 struct fifo_quadword*
720 pke_pc_fifo(struct pke_device* me, int operand_num, unsigned_4** operand)
721 {
722 int num = operand_num;
723 int new_qw_pc, new_fifo_pc;
724 struct fifo_quadword* fq = NULL;
725
726 ASSERT(num > 0);
727
728 /* snapshot current pointers */
729 new_fifo_pc = me->fifo_pc;
730 new_qw_pc = me->qw_pc;
731
732 /* printf("pke %d pc_fifo operand_num %d\n", me->pke_number, operand_num); */
733
734 do
735 {
736 /* one word skipped */
737 num --;
738
739 /* point to next word */
740 new_qw_pc ++;
741 if(new_qw_pc == 4)
742 {
743 new_qw_pc = 0;
744 new_fifo_pc ++;
745 }
746
747 /* check for FIFO underflow */
748 if(me->fifo_num_elements == new_fifo_pc)
749 {
750 fq = NULL;
751 break;
752 }
753
754 /* skip over DMA tag words if present in word 0 or 1 */
755 fq = & me->fifo[new_fifo_pc];
756 if(fq->word_class[new_qw_pc] == wc_dma)
757 {
758 /* mismatch error! */
759 PKE_REG_MASK_SET(me, STAT, ER0, 1);
760 /* skip by going around loop an extra time */
761 num ++;
762 }
763 }
764 while(num > 0);
765
766 /* return pointer to operand word itself */
767 if(fq != NULL)
768 {
769 *operand = & fq->data[new_qw_pc];
770
771 /* annote the word where the pseudo lands as an PKE operand */
772 ASSERT(fq->word_class[new_qw_pc] == wc_pkedata ||
773 fq->word_class[new_qw_pc] == wc_unknown);
774 fq->word_class[new_qw_pc] = wc_pkedata;
775 }
776
777 return fq;
778 }
779
780
781 /* Return pointer to given operand# in FIFO. `operand_num' starts at 1.
782 If FIFO is not full enough, return 0. Skip over DMA tags, but mark
783 them as an error (ER0). */
784
785 unsigned_4*
786 pke_pc_operand(struct pke_device* me, int operand_num)
787 {
788 unsigned_4* operand = NULL;
789 struct fifo_quadword* fifo_operand;
790
791 fifo_operand = pke_pc_fifo(me, operand_num, & operand);
792
793 if(fifo_operand == NULL)
794 ASSERT(operand == NULL); /* pke_pc_fifo() ought leave it untouched */
795
796 return operand;
797 }
798
799
800 /* Return a bit-field extract of given operand# in FIFO, and its
801 source-addr. `bit_offset' starts at 0, referring to LSB after PKE
802 instruction word. Width must be >0, <=32. Assume FIFO is full
803 enough. Skip over DMA tags, but mark them as an error (ER0). */
804
805 unsigned_4
806 pke_pc_operand_bits(struct pke_device* me, int bit_offset, int bit_width, unsigned_4* source_addr)
807 {
808 unsigned_4* word = NULL;
809 unsigned_4 value;
810 struct fifo_quadword* fifo_operand;
811 int wordnumber, bitnumber;
812
813 wordnumber = bit_offset/32;
814 bitnumber = bit_offset%32;
815
816 /* find operand word with bitfield */
817 fifo_operand = pke_pc_fifo(me, wordnumber + 1, &word);
818 ASSERT(word != NULL);
819
820 /* extract bitfield from word */
821 value = BIT_MASK_GET(*word, bitnumber, bitnumber + bit_width - 1);
822
823 /* extract source addr from fifo word */
824 *source_addr = fifo_operand->source_address;
825
826 return value;
827 }
828
829
830
831 /* check for stall conditions on indicated devices (path* only on PKE1), do not change status
832 return 0 iff no stall */
833 int
834 pke_check_stall(struct pke_device* me, enum pke_check_target what)
835 {
836 int any_stall = 0;
837 unsigned_4 cop2_stat, gpuif_stat;
838
839 /* read status words */
840 ASSERT(sizeof(unsigned_4) == 4);
841 PKE_MEM_READ(me, (GIF_REG_STAT),
842 & gpuif_stat,
843 4);
844 PKE_MEM_READ(me, (COP2_REG_STAT_ADDR),
845 & cop2_stat,
846 4);
847
848 /* perform checks */
849 if(what == chk_vu)
850 {
851 if(me->pke_number == 0)
852 any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS0_B, COP2_REG_STAT_VBS0_E);
853 else /* if(me->pke_number == 1) */
854 any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS1_B, COP2_REG_STAT_VBS1_E);
855 }
856 else if(what == chk_path1) /* VU -> GPUIF */
857 {
858 if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 1)
859 any_stall = 1;
860 }
861 else if(what == chk_path2) /* PKE -> GPUIF */
862 {
863 if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 2)
864 any_stall = 1;
865 }
866 else if(what == chk_path3) /* DMA -> GPUIF */
867 {
868 if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 3)
869 any_stall = 1;
870 }
871 else
872 {
873 /* invalid what */
874 ASSERT(0);
875 }
876
877 /* any stall reasons? */
878 return any_stall;
879 }
880
881
882 /* flip the DBF bit; recompute TOPS, ITOP & TOP */
883 void
884 pke_flip_dbf(struct pke_device* me)
885 {
886 /* compute new ITOP and TOP */
887 PKE_REG_MASK_SET(me, ITOP, ITOP,
888 PKE_REG_MASK_GET(me, ITOPS, ITOPS));
889 PKE_REG_MASK_SET(me, TOP, TOP,
890 PKE_REG_MASK_GET(me, TOPS, TOPS));
891 /* flip DBF */
892 PKE_REG_MASK_SET(me, DBF, DF,
893 PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1);
894 PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF));
895 /* compute new TOPS */
896 PKE_REG_MASK_SET(me, TOPS, TOPS,
897 (PKE_REG_MASK_GET(me, BASE, BASE) +
898 (PKE_REG_MASK_GET(me, DBF, DF) *
899 PKE_REG_MASK_GET(me, OFST, OFFSET))));
900 }
901
902
903
904 /* PKEcode handler functions -- responsible for checking and
905 confirming old stall conditions, executing pkecode, updating PC and
906 status registers -- may assume being run on correct PKE unit */
907
908 void
909 pke_code_nop(struct pke_device* me, unsigned_4 pkecode)
910 {
911 /* done */
912 pke_pc_advance(me, 1);
913 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
914 }
915
916
917 void
918 pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode)
919 {
920 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
921 /* copy immediate value into CYCLE reg */
922 PKE_REG_MASK_SET(me, CYCLE, WL, BIT_MASK_GET(imm, 8, 15));
923 PKE_REG_MASK_SET(me, CYCLE, CL, BIT_MASK_GET(imm, 0, 7));
924 /* done */
925 pke_pc_advance(me, 1);
926 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
927 }
928
929
930 void
931 pke_code_offset(struct pke_device* me, unsigned_4 pkecode)
932 {
933 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
934 /* copy 10 bits to OFFSET field */
935 PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9));
936 /* clear DBF bit */
937 PKE_REG_MASK_SET(me, DBF, DF, 0);
938 /* clear other DBF bit */
939 PKE_REG_MASK_SET(me, STAT, DBF, 0);
940 /* set TOPS = BASE */
941 PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE));
942 /* done */
943 pke_pc_advance(me, 1);
944 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
945 }
946
947
948 void
949 pke_code_base(struct pke_device* me, unsigned_4 pkecode)
950 {
951 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
952 /* copy 10 bits to BASE field */
953 PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9));
954 /* clear DBF bit */
955 PKE_REG_MASK_SET(me, DBF, DF, 0);
956 /* clear other DBF bit */
957 PKE_REG_MASK_SET(me, STAT, DBF, 0);
958 /* set TOPS = BASE */
959 PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE));
960 /* done */
961 pke_pc_advance(me, 1);
962 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
963 }
964
965
966 void
967 pke_code_itop(struct pke_device* me, unsigned_4 pkecode)
968 {
969 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
970 /* copy 10 bits to ITOPS field */
971 PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9));
972 /* done */
973 pke_pc_advance(me, 1);
974 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
975 }
976
977
978 void
979 pke_code_stmod(struct pke_device* me, unsigned_4 pkecode)
980 {
981 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
982 /* copy 2 bits to MODE register */
983 PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2));
984 /* done */
985 pke_pc_advance(me, 1);
986 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
987 }
988
989
990 void
991 pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode)
992 {
993 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
994 unsigned_4 gif_mode;
995
996 /* set appropriate bit */
997 if(BIT_MASK_GET(imm, PKE_REG_MSKPATH3_B, PKE_REG_MSKPATH3_E) != 0)
998 gif_mode = GIF_REG_MODE_M3R_MASK;
999 else
1000 gif_mode = 0;
1001
1002 /* write register; patrickm code will look at M3R bit only */
1003 PKE_MEM_WRITE(me, GIF_REG_MODE, & gif_mode, 4);
1004
1005 /* done */
1006 pke_pc_advance(me, 1);
1007 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1008 }
1009
1010
1011 void
1012 pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode)
1013 {
1014 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1015 /* copy 16 bits to MARK register */
1016 PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15));
1017 /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */
1018 PKE_REG_MASK_SET(me, STAT, MRK, 1);
1019 /* done */
1020 pke_pc_advance(me, 1);
1021 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1022 }
1023
1024
1025 void
1026 pke_code_flushe(struct pke_device* me, unsigned_4 pkecode)
1027 {
1028 /* compute next PEW bit */
1029 if(pke_check_stall(me, chk_vu))
1030 {
1031 /* VU busy */
1032 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1033 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1034 /* try again next cycle */
1035 }
1036 else
1037 {
1038 /* VU idle */
1039 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1040 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1041 pke_pc_advance(me, 1);
1042 }
1043 }
1044
1045
1046 void
1047 pke_code_flush(struct pke_device* me, unsigned_4 pkecode)
1048 {
1049 int something_busy = 0;
1050
1051 /* compute next PEW, PGW bits */
1052 if(pke_check_stall(me, chk_vu))
1053 {
1054 something_busy = 1;
1055 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1056 }
1057 else
1058 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1059
1060
1061 if(pke_check_stall(me, chk_path1) ||
1062 pke_check_stall(me, chk_path2))
1063 {
1064 something_busy = 1;
1065 PKE_REG_MASK_SET(me, STAT, PGW, 1);
1066 }
1067 else
1068 PKE_REG_MASK_SET(me, STAT, PGW, 0);
1069
1070 /* go or no go */
1071 if(something_busy)
1072 {
1073 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1074 /* try again next cycle */
1075 }
1076 else
1077 {
1078 /* all idle */
1079 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1080 pke_pc_advance(me, 1);
1081 }
1082 }
1083
1084
1085 void
1086 pke_code_flusha(struct pke_device* me, unsigned_4 pkecode)
1087 {
1088 int something_busy = 0;
1089
1090 /* compute next PEW, PGW bits */
1091 if(pke_check_stall(me, chk_vu))
1092 {
1093 something_busy = 1;
1094 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1095 }
1096 else
1097 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1098
1099
1100 if(pke_check_stall(me, chk_path1) ||
1101 pke_check_stall(me, chk_path2) ||
1102 pke_check_stall(me, chk_path3))
1103 {
1104 something_busy = 1;
1105 PKE_REG_MASK_SET(me, STAT, PGW, 1);
1106 }
1107 else
1108 PKE_REG_MASK_SET(me, STAT, PGW, 0);
1109
1110 if(something_busy)
1111 {
1112 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1113 /* try again next cycle */
1114 }
1115 else
1116 {
1117 /* all idle */
1118 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1119 pke_pc_advance(me, 1);
1120 }
1121 }
1122
1123
1124 void
1125 pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode)
1126 {
1127 /* compute next PEW bit */
1128 if(pke_check_stall(me, chk_vu))
1129 {
1130 /* VU busy */
1131 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1132 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1133 /* try again next cycle */
1134 }
1135 else
1136 {
1137 unsigned_4 vu_pc;
1138 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1139
1140 /* VU idle */
1141 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1142
1143 /* flip DBF on PKE1 */
1144 if(me->pke_number == 1)
1145 pke_flip_dbf(me);
1146
1147 /* compute new PC for VU */
1148 vu_pc = BIT_MASK_GET(imm, 0, 15);
1149
1150 /* write new PC; callback function gets VU running */
1151 ASSERT(sizeof(unsigned_4) == 4);
1152 PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1153 & vu_pc,
1154 4);
1155
1156 /* done */
1157 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1158 pke_pc_advance(me, 1);
1159 }
1160 }
1161
1162
1163
1164 void
1165 pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode)
1166 {
1167 /* compute next PEW bit */
1168 if(pke_check_stall(me, chk_vu))
1169 {
1170 /* VU busy */
1171 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1172 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1173 /* try again next cycle */
1174 }
1175 else
1176 {
1177 unsigned_4 vu_pc;
1178
1179 /* VU idle */
1180 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1181
1182 /* flip DBF on PKE1 */
1183 if(me->pke_number == 1)
1184 pke_flip_dbf(me);
1185
1186 /* read old PC */
1187 ASSERT(sizeof(unsigned_4) == 4);
1188 PKE_MEM_READ(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1189 & vu_pc,
1190 4);
1191
1192 /* rewrite new PC; callback function gets VU running */
1193 ASSERT(sizeof(unsigned_4) == 4);
1194 PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1195 & vu_pc,
1196 4);
1197
1198 /* done */
1199 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1200 pke_pc_advance(me, 1);
1201 }
1202 }
1203
1204
1205 void
1206 pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode)
1207 {
1208 int something_busy = 0;
1209
1210 /* compute next PEW, PGW bits */
1211 if(pke_check_stall(me, chk_vu))
1212 {
1213 something_busy = 1;
1214 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1215 }
1216 else
1217 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1218
1219
1220 if(pke_check_stall(me, chk_path1) ||
1221 pke_check_stall(me, chk_path2) ||
1222 pke_check_stall(me, chk_path3))
1223 {
1224 something_busy = 1;
1225 PKE_REG_MASK_SET(me, STAT, PGW, 1);
1226 }
1227 else
1228 PKE_REG_MASK_SET(me, STAT, PGW, 0);
1229
1230 /* go or no go */
1231 if(something_busy)
1232 {
1233 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1234 /* try again next cycle */
1235 }
1236 else
1237 {
1238 unsigned_4 vu_pc;
1239 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1240
1241 /* flip DBF on PKE1 */
1242 if(me->pke_number == 1)
1243 pke_flip_dbf(me);
1244
1245 /* compute new PC for VU */
1246 vu_pc = BIT_MASK_GET(imm, 0, 15);
1247
1248 /* rewrite new PC; callback function gets VU running */
1249 ASSERT(sizeof(unsigned_4) == 4);
1250 PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1251 & vu_pc,
1252 4);
1253
1254 /* done */
1255 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1256 pke_pc_advance(me, 1);
1257 }
1258 }
1259
1260
1261 void
1262 pke_code_stmask(struct pke_device* me, unsigned_4 pkecode)
1263 {
1264 /* check that FIFO has one more word for STMASK operand */
1265 unsigned_4* mask;
1266
1267 mask = pke_pc_operand(me, 1);
1268 if(mask != NULL)
1269 {
1270 /* "transferring" operand */
1271 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1272
1273 /* set NUM */
1274 PKE_REG_MASK_SET(me, NUM, NUM, 1);
1275
1276 /* fill the register */
1277 PKE_REG_MASK_SET(me, MASK, MASK, *mask);
1278
1279 /* set NUM */
1280 PKE_REG_MASK_SET(me, NUM, NUM, 0);
1281
1282 /* done */
1283 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1284 pke_pc_advance(me, 2);
1285 }
1286 else
1287 {
1288 /* need to wait for another word */
1289 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1290 /* try again next cycle */
1291 }
1292 }
1293
1294
1295 void
1296 pke_code_strow(struct pke_device* me, unsigned_4 pkecode)
1297 {
1298 /* check that FIFO has four more words for STROW operand */
1299 unsigned_4* last_op;
1300
1301 last_op = pke_pc_operand(me, 4);
1302 if(last_op != NULL)
1303 {
1304 /* "transferring" operand */
1305 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1306
1307 /* set NUM */
1308 PKE_REG_MASK_SET(me, NUM, NUM, 1);
1309
1310 /* copy ROW registers: must all exist if 4th operand exists */
1311 me->regs[PKE_REG_R0][0] = * pke_pc_operand(me, 1);
1312 me->regs[PKE_REG_R1][0] = * pke_pc_operand(me, 2);
1313 me->regs[PKE_REG_R2][0] = * pke_pc_operand(me, 3);
1314 me->regs[PKE_REG_R3][0] = * pke_pc_operand(me, 4);
1315
1316 /* set NUM */
1317 PKE_REG_MASK_SET(me, NUM, NUM, 0);
1318
1319 /* done */
1320 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1321 pke_pc_advance(me, 5);
1322 }
1323 else
1324 {
1325 /* need to wait for another word */
1326 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1327 /* try again next cycle */
1328 }
1329 }
1330
1331
1332 void
1333 pke_code_stcol(struct pke_device* me, unsigned_4 pkecode)
1334 {
1335 /* check that FIFO has four more words for STCOL operand */
1336 unsigned_4* last_op;
1337
1338 last_op = pke_pc_operand(me, 4);
1339 if(last_op != NULL)
1340 {
1341 /* "transferring" operand */
1342 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1343
1344 /* set NUM */
1345 PKE_REG_MASK_SET(me, NUM, NUM, 1);
1346
1347 /* copy COL registers: must all exist if 4th operand exists */
1348 me->regs[PKE_REG_C0][0] = * pke_pc_operand(me, 1);
1349 me->regs[PKE_REG_C1][0] = * pke_pc_operand(me, 2);
1350 me->regs[PKE_REG_C2][0] = * pke_pc_operand(me, 3);
1351 me->regs[PKE_REG_C3][0] = * pke_pc_operand(me, 4);
1352
1353 /* set NUM */
1354 PKE_REG_MASK_SET(me, NUM, NUM, 0);
1355
1356 /* done */
1357 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1358 pke_pc_advance(me, 5);
1359 }
1360 else
1361 {
1362 /* need to wait for another word */
1363 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1364 /* try again next cycle */
1365 }
1366 }
1367
1368
1369 void
1370 pke_code_mpg(struct pke_device* me, unsigned_4 pkecode)
1371 {
1372 unsigned_4* last_mpg_word;
1373 int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
1374 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1375
1376 /* assert 64-bit alignment of MPG operand */
1377 if(me->qw_pc != 3 && me->qw_pc != 1)
1378 return pke_code_error(me, pkecode);
1379
1380 /* map zero to max+1 */
1381 if(num==0) num=0x100;
1382
1383 /* check that FIFO has a few more words for MPG operand */
1384 last_mpg_word = pke_pc_operand(me, num*2); /* num: number of 64-bit words */
1385 if(last_mpg_word != NULL)
1386 {
1387 /* perform implied FLUSHE */
1388 if(pke_check_stall(me, chk_vu))
1389 {
1390 /* VU busy */
1391 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1392 /* retry this instruction next clock */
1393 }
1394 else
1395 {
1396 /* VU idle */
1397 int i;
1398
1399 /* "transferring" operand */
1400 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1401
1402 /* set NUM */
1403 PKE_REG_MASK_SET(me, NUM, NUM, num);
1404
1405 /* transfer VU instructions, one word-pair per iteration */
1406 for(i=0; i<num; i++)
1407 {
1408 address_word vu_addr_base, vu_addr;
1409 address_word vutrack_addr_base, vutrack_addr;
1410 address_word vu_addr_max_size;
1411 unsigned_4 vu_lower_opcode, vu_upper_opcode;
1412 unsigned_4* operand;
1413 unsigned_4 source_addr;
1414 struct fifo_quadword* fq;
1415 int next_num;
1416
1417 /* decrement NUM */
1418 next_num = PKE_REG_MASK_GET(me, NUM, NUM) - 1;
1419 PKE_REG_MASK_SET(me, NUM, NUM, next_num);
1420
1421 /* imm: in 64-bit units for MPG instruction */
1422 /* VU*_MEM0 : instruction memory */
1423 vu_addr_base = (me->pke_number == 0) ?
1424 VU0_MEM0_WINDOW_START : VU1_MEM0_WINDOW_START;
1425 vu_addr_max_size = (me->pke_number == 0) ?
1426 VU0_MEM0_SIZE : VU1_MEM0_SIZE;
1427 vutrack_addr_base = (me->pke_number == 0) ?
1428 VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START;
1429
1430 /* compute VU address for this word-pair */
1431 vu_addr = vu_addr_base + (imm + i) * 8;
1432 /* check for vu_addr overflow */
1433 while(vu_addr >= vu_addr_base + vu_addr_max_size)
1434 vu_addr -= vu_addr_max_size;
1435
1436 /* compute VU tracking address */
1437 vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 2;
1438
1439 /* Fetch operand words; assume they are already little-endian for VU imem */
1440 fq = pke_pc_fifo(me, i*2 + 1, & operand);
1441 vu_lower_opcode = *operand;
1442 vu_upper_opcode = *pke_pc_operand(me, i*2 + 2);
1443
1444 /* write data into VU memory */
1445 /* lower (scalar) opcode comes in first word */
1446 PKE_MEM_WRITE(me, vu_addr,
1447 & vu_lower_opcode,
1448 4);
1449 /* upper (vector) opcode comes in second word */
1450 ASSERT(sizeof(unsigned_4) == 4);
1451 PKE_MEM_WRITE(me, vu_addr + 4,
1452 & vu_upper_opcode,
1453 4);
1454
1455 /* write tracking address in target byte-order */
1456 source_addr = H2T_4(fq->source_address);
1457 ASSERT(sizeof(unsigned_4) == 4);
1458 PKE_MEM_WRITE(me, vutrack_addr,
1459 & source_addr,
1460 4);
1461 } /* VU xfer loop */
1462
1463 /* check NUM */
1464 ASSERT(PKE_REG_MASK_GET(me, NUM, NUM) == 0);
1465
1466 /* done */
1467 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1468 pke_pc_advance(me, 1 + num*2);
1469 }
1470 } /* if FIFO full enough */
1471 else
1472 {
1473 /* need to wait for another word */
1474 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1475 /* retry this instruction next clock */
1476 }
1477 }
1478
1479
1480 void
1481 pke_code_direct(struct pke_device* me, unsigned_4 pkecode)
1482 {
1483 /* check that FIFO has a few more words for DIRECT operand */
1484 unsigned_4* last_direct_word;
1485 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1486
1487 /* assert 128-bit alignment of DIRECT operand */
1488 if(me->qw_pc != 3)
1489 return pke_code_error(me, pkecode);
1490
1491 /* map zero to max+1 */
1492 if(imm==0) imm=0x10000;
1493
1494 last_direct_word = pke_pc_operand(me, imm*4); /* imm: number of 128-bit words */
1495 if(last_direct_word != NULL)
1496 {
1497 /* VU idle */
1498 int i;
1499 quadword fifo_data;
1500
1501 /* "transferring" operand */
1502 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1503
1504 /* transfer GPUIF quadwords, one word per iteration */
1505 for(i=0; i<imm*4; i++)
1506 {
1507 unsigned_4* operand = pke_pc_operand(me, 1+i);
1508
1509 /* collect word into quadword */
1510 fifo_data[i % 4] = *operand;
1511
1512 /* write to GPUIF FIFO only with full quadword */
1513 if(i % 4 == 3)
1514 {
1515 ASSERT(sizeof(fifo_data) == 16);
1516 PKE_MEM_WRITE(me, GIF_PATH2_FIFO_ADDR,
1517 fifo_data,
1518 16);
1519 } /* write collected quadword */
1520
1521 } /* GPUIF xfer loop */
1522
1523 /* done */
1524 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1525 pke_pc_advance(me, 1 + imm*4);
1526 } /* if FIFO full enough */
1527 else
1528 {
1529 /* need to wait for another word */
1530 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1531 /* retry this instruction next clock */
1532 }
1533 }
1534
1535
1536 void
1537 pke_code_directhl(struct pke_device* me, unsigned_4 pkecode)
1538 {
1539 /* treat the same as DIRECTH */
1540 pke_code_direct(me, pkecode);
1541 }
1542
1543
1544 void
1545 pke_code_unpack(struct pke_device* me, unsigned_4 pkecode)
1546 {
1547 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1548 int cmd = BIT_MASK_GET(pkecode, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E);
1549 int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
1550 short vn = BIT_MASK_GET(cmd, 2, 3); /* unpack shape controls */
1551 short vl = BIT_MASK_GET(cmd, 0, 1);
1552 int m = BIT_MASK_GET(cmd, 4, 4);
1553 short cl = PKE_REG_MASK_GET(me, CYCLE, CL); /* cycle controls */
1554 short wl = PKE_REG_MASK_GET(me, CYCLE, WL);
1555 int r = BIT_MASK_GET(imm, 15, 15); /* indicator bits in imm value */
1556 int usn = BIT_MASK_GET(imm, 14, 14);
1557
1558 int n, num_operands;
1559 unsigned_4* last_operand_word = NULL;
1560
1561 /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */
1562 if(wl <= cl)
1563 n = num;
1564 else
1565 n = cl * (num/wl) + PKE_LIMIT(num % wl, cl);
1566 num_operands = ((32 >> vl) * (vn+1) * n)/32;
1567
1568 /* confirm that FIFO has enough words in it */
1569 if(num_operands > 0)
1570 last_operand_word = pke_pc_operand(me, num_operands);
1571 if(last_operand_word != NULL || num_operands == 0)
1572 {
1573 address_word vu_addr_base, vutrack_addr_base;
1574 address_word vu_addr_max_size;
1575 int vector_num_out, vector_num_in;
1576
1577 /* "transferring" operand */
1578 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1579
1580 /* don't check whether VU is idle */
1581
1582 /* compute VU address base */
1583 if(me->pke_number == 0)
1584 {
1585 vu_addr_base = VU0_MEM1_WINDOW_START + 16 * BIT_MASK_GET(imm, 0, 9);
1586 vu_addr_max_size = VU0_MEM1_SIZE;
1587 vutrack_addr_base = VU0_MEM1_SRCADDR_START + 4 * BIT_MASK_GET(imm, 0, 9);
1588 }
1589 else
1590 {
1591 vu_addr_base = VU1_MEM1_WINDOW_START + 16 * BIT_MASK_GET(imm, 0, 9);
1592 vu_addr_max_size = VU1_MEM1_SIZE;
1593 vutrack_addr_base = VU1_MEM1_SRCADDR_START + 4 * BIT_MASK_GET(imm, 0, 9);
1594 if(r) /* double-buffering */
1595 {
1596 vu_addr_base += 16 * PKE_REG_MASK_GET(me, TOPS, TOPS);
1597 vutrack_addr_base += 4 * PKE_REG_MASK_GET(me, TOPS, TOPS);
1598 }
1599 }
1600
1601
1602 /* set NUM */
1603 PKE_REG_MASK_SET(me, NUM, NUM, num == 0 ? 0x100 : num );
1604
1605 /* transfer given number of vectors */
1606 vector_num_out = 0; /* output vector number being processed */
1607 vector_num_in = 0; /* argument vector number being processed */
1608 do
1609 {
1610 quadword vu_old_data;
1611 quadword vu_new_data;
1612 quadword unpacked_data;
1613 address_word vu_addr;
1614 address_word vutrack_addr;
1615 unsigned_4 source_addr = 0;
1616 int i;
1617 int next_num;
1618
1619 /* decrement NUM */
1620 next_num = PKE_REG_MASK_GET(me, NUM, NUM) - 1;
1621 PKE_REG_MASK_SET(me, NUM, NUM, next_num);
1622
1623 /* compute VU destination address, as bytes in R5900 memory */
1624 if(cl >= wl)
1625 {
1626 /* map zero to max+1 */
1627 int addrwl = (wl == 0) ? 0x0100 : wl;
1628 vu_addr = vu_addr_base + 16*(cl*(vector_num_out/addrwl) + (vector_num_out%addrwl));
1629 }
1630 else
1631 vu_addr = vu_addr_base + 16*vector_num_out;
1632
1633 /* check for vu_addr overflow */
1634 while(vu_addr >= vu_addr_base + vu_addr_max_size)
1635 vu_addr -= vu_addr_max_size;
1636
1637 /* compute address of tracking table entry */
1638 vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 4;
1639
1640 /* read old VU data word at address */
1641 ASSERT(sizeof(vu_old_data) == 16);
1642 PKE_MEM_READ(me, vu_addr,
1643 vu_old_data,
1644 16);
1645
1646 /* yank memory out of little-endian order */
1647 for(i=0; i<4; i++)
1648 vu_old_data[i] = LE2H_4(vu_old_data[i]);
1649
1650 /* For cyclic unpack, next operand quadword may come from instruction stream
1651 or be zero. */
1652 if((num == 0 && cl == 0 && wl == 0) || /* shortcut clear */
1653 ((cl < wl) && ((vector_num_out % wl) >= cl))) /* wl != 0, set above */
1654 {
1655 /* clear operand - used only in a "indeterminate" state */
1656 for(i = 0; i < 4; i++)
1657 unpacked_data[i] = 0;
1658 }
1659 else
1660 {
1661 /* compute packed vector dimensions */
1662 int vectorbits, unitbits;
1663
1664 if(vl < 3) /* PKE_UNPACK_*_{32,16,8} */
1665 {
1666 unitbits = (32 >> vl);
1667 vectorbits = unitbits * (vn+1);
1668 }
1669 else if(vl == 3 && vn == 3) /* PKE_UNPACK_V4_5 */
1670 {
1671 unitbits = 5;
1672 vectorbits = 16;
1673 }
1674 else /* illegal unpack variant */
1675 {
1676 /* treat as illegal instruction */
1677 pke_code_error(me, pkecode);
1678 return;
1679 }
1680
1681 /* loop over columns */
1682 for(i=0; i<=vn; i++)
1683 {
1684 unsigned_4 operand;
1685
1686 /* offset in bits in current operand word */
1687 int bitoffset =
1688 (vector_num_in * vectorbits) + (i * unitbits); /* # of bits from PKEcode */
1689
1690 /* last unit of V4_5 is only one bit wide */
1691 if(vl == 3 && vn == 3 && i == 3) /* PKE_UNPACK_V4_5 */
1692 unitbits = 1;
1693
1694 /* fetch bitfield operand */
1695 operand = pke_pc_operand_bits(me, bitoffset, unitbits, & source_addr);
1696
1697 /* selectively sign-extend; not for V4_5 1-bit value */
1698 if(usn || unitbits == 1)
1699 unpacked_data[i] = operand;
1700 else
1701 unpacked_data[i] = SEXT32(operand, unitbits-1);
1702 }
1703
1704 /* consumed a vector from the PKE instruction stream */
1705 vector_num_in ++;
1706 } /* unpack word from instruction operand */
1707
1708 /* compute replacement word */
1709 if(m) /* use mask register? */
1710 {
1711 /* compute index into mask register for this word */
1712 int addrwl = (wl == 0) ? 0x0100 : wl;
1713 int mask_index = PKE_LIMIT(vector_num_out % addrwl, 3);
1714
1715 for(i=0; i<4; i++) /* loop over columns */
1716 {
1717 int mask_op = PKE_MASKREG_GET(me, mask_index, i);
1718 unsigned_4* masked_value = NULL;
1719 unsigned_4 zero = 0;
1720
1721 switch(mask_op)
1722 {
1723 case PKE_MASKREG_INPUT:
1724 /* for vn == 0, all columns are copied from column 0 */
1725 if(vn == 0)
1726 masked_value = & unpacked_data[0];
1727 else if(i > vn)
1728 masked_value = & zero; /* arbitrary data: undefined in spec */
1729 else
1730 masked_value = & unpacked_data[i];
1731 break;
1732
1733 case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */
1734 masked_value = & me->regs[PKE_REG_R0 + i][0];
1735 break;
1736
1737 case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */
1738 masked_value = & me->regs[PKE_REG_C0 + mask_index][0];
1739 break;
1740
1741 case PKE_MASKREG_NOTHING:
1742 /* "write inhibit" by re-copying old data */
1743 masked_value = & vu_old_data[i];
1744 break;
1745
1746 default:
1747 ASSERT(0);
1748 /* no other cases possible */
1749 }
1750
1751 /* copy masked value for column */
1752 vu_new_data[i] = *masked_value;
1753 } /* loop over columns */
1754 } /* mask */
1755 else
1756 {
1757 /* no mask - just copy over entire unpacked quadword */
1758 memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data));
1759 }
1760
1761 /* process STMOD register for accumulation operations */
1762 switch(PKE_REG_MASK_GET(me, MODE, MDE))
1763 {
1764 case PKE_MODE_ADDROW: /* add row registers to output data */
1765 for(i=0; i<4; i++)
1766 /* exploit R0..R3 contiguity */
1767 vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
1768 break;
1769
1770 case PKE_MODE_ACCROW: /* add row registers to output data; accumulate */
1771 for(i=0; i<4; i++)
1772 {
1773 /* exploit R0..R3 contiguity */
1774 vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
1775 me->regs[PKE_REG_R0 + i][0] = vu_new_data[i];
1776 }
1777 break;
1778
1779 case PKE_MODE_INPUT: /* pass data through */
1780 default:
1781 ;
1782 }
1783
1784 /* yank memory into little-endian order */
1785 for(i=0; i<4; i++)
1786 vu_new_data[i] = H2LE_4(vu_new_data[i]);
1787
1788 /* write replacement word */
1789 ASSERT(sizeof(vu_new_data) == 16);
1790 PKE_MEM_WRITE(me, vu_addr,
1791 vu_new_data,
1792 16);
1793
1794 /* write tracking address in target byte-order */
1795 source_addr = H2T_4(source_addr);
1796 ASSERT(sizeof(unsigned_4) == 4);
1797 PKE_MEM_WRITE(me, vutrack_addr,
1798 & source_addr,
1799 4);
1800
1801 /* next vector please */
1802 vector_num_out ++;
1803 } /* vector transfer loop */
1804 while(PKE_REG_MASK_GET(me, NUM, NUM) > 0);
1805
1806 /* done */
1807 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1808 pke_pc_advance(me, 1 + num_operands);
1809 } /* PKE FIFO full enough */
1810 else
1811 {
1812 /* need to wait for another word */
1813 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1814 /* retry this instruction next clock */
1815 }
1816 }
1817
1818
1819 void
1820 pke_code_error(struct pke_device* me, unsigned_4 pkecode)
1821 {
1822 /* set ER1 flag in STAT register */
1823 PKE_REG_MASK_SET(me, STAT, ER1, 1);
1824 /* advance over faulty word */
1825 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1826 pke_pc_advance(me, 1);
1827 }
This page took 0.066024 seconds and 4 git commands to generate.