456cbafb4ac8cbb411aa345e8587fcbce0f4f7b6
[deliverable/binutils-gdb.git] / sim / mips / sky-pke.c
1 /* Copyright (C) 1998, Cygnus Solutions */
2
3
4 /* Debugguing PKE? */
5 #define PKE_DEBUG
6
7
8 #include <stdlib.h>
9 #include "sky-pke.h"
10 #include "sky-dma.h"
11 #include "sim-bits.h"
12 #include "sim-assert.h"
13 #include "sky-vu0.h"
14 #include "sky-vu1.h"
15 #include "sky-gpuif.h"
16
17
18 /* Imported functions */
19
20 void device_error (device *me, char* message); /* device.c */
21
22
23 /* Internal function declarations */
24
25 static int pke_io_read_buffer(device*, void*, int, address_word,
26 unsigned, sim_cpu*, sim_cia);
27 static int pke_io_write_buffer(device*, const void*, int, address_word,
28 unsigned, sim_cpu*, sim_cia);
29 static void pke_issue(SIM_DESC, struct pke_device*);
30 static void pke_pc_advance(struct pke_device*, int num_words);
31 static unsigned_4* pke_pc_operand(struct pke_device*, int operand_num);
32 static unsigned_4 pke_pc_operand_bits(struct pke_device*, int bit_offset,
33 int bit_width, unsigned_4* sourceaddr);
34 static struct fifo_quadword* pke_pc_fifo(struct pke_device*, int operand_num,
35 unsigned_4** operand);
36 static void pke_attach(SIM_DESC sd, struct pke_device* me);
37 enum pke_check_target { chk_vu, chk_path1, chk_path2, chk_path3 };
38 static int pke_check_stall(struct pke_device* me, enum pke_check_target what);
39 static void pke_flip_dbf(struct pke_device* me);
40 /* PKEcode handlers */
41 static void pke_code_nop(struct pke_device* me, unsigned_4 pkecode);
42 static void pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode);
43 static void pke_code_offset(struct pke_device* me, unsigned_4 pkecode);
44 static void pke_code_base(struct pke_device* me, unsigned_4 pkecode);
45 static void pke_code_itop(struct pke_device* me, unsigned_4 pkecode);
46 static void pke_code_stmod(struct pke_device* me, unsigned_4 pkecode);
47 static void pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode);
48 static void pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode);
49 static void pke_code_flushe(struct pke_device* me, unsigned_4 pkecode);
50 static void pke_code_flush(struct pke_device* me, unsigned_4 pkecode);
51 static void pke_code_flusha(struct pke_device* me, unsigned_4 pkecode);
52 static void pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode);
53 static void pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode);
54 static void pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode);
55 static void pke_code_stmask(struct pke_device* me, unsigned_4 pkecode);
56 static void pke_code_strow(struct pke_device* me, unsigned_4 pkecode);
57 static void pke_code_stcol(struct pke_device* me, unsigned_4 pkecode);
58 static void pke_code_mpg(struct pke_device* me, unsigned_4 pkecode);
59 static void pke_code_direct(struct pke_device* me, unsigned_4 pkecode);
60 static void pke_code_directhl(struct pke_device* me, unsigned_4 pkecode);
61 static void pke_code_unpack(struct pke_device* me, unsigned_4 pkecode);
62 static void pke_code_error(struct pke_device* me, unsigned_4 pkecode);
63
64
65
66 /* Static data */
67
68 struct pke_device pke0_device =
69 {
70 { "pke0", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
71 0, 0, /* ID, flags */
72 {}, /* regs */
73 {}, 0, /* FIFO write buffer */
74 NULL, 0, 0, NULL, /* FIFO */
75 0, 0 /* pc */
76 };
77
78
79 struct pke_device pke1_device =
80 {
81 { "pke1", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
82 1, 0, /* ID, flags */
83 {}, /* regs */
84 {}, 0, /* FIFO write buffer */
85 NULL, 0, 0, NULL, /* FIFO */
86 0, 0 /* pc */
87 };
88
89
90
91 /* External functions */
92
93
94 /* Attach PKE addresses to main memory */
95
96 void
97 pke0_attach(SIM_DESC sd)
98 {
99 pke_attach(sd, & pke0_device);
100 }
101
102 void
103 pke1_attach(SIM_DESC sd)
104 {
105 pke_attach(sd, & pke1_device);
106 }
107
108
109
110 /* Issue a PKE instruction if possible */
111
112 void
113 pke0_issue(SIM_DESC sd)
114 {
115 pke_issue(sd, & pke0_device);
116 }
117
118 void
119 pke1_issue(SIM_DESC sd)
120 {
121 pke_issue(sd, & pke1_device);
122 }
123
124
125
126 /* Internal functions */
127
128
129 /* Attach PKE memory regions to simulator */
130
131 void
132 pke_attach(SIM_DESC sd, struct pke_device* me)
133 {
134 /* register file */
135 sim_core_attach (sd, NULL, 0, access_read_write, 0,
136 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START,
137 PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/,
138 0 /*modulo*/,
139 (device*) me,
140 NULL /*buffer*/);
141
142 /* FIFO port */
143 sim_core_attach (sd, NULL, 0, access_read_write, 0,
144 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR,
145 sizeof(quadword) /*nr_bytes*/,
146 0 /*modulo*/,
147 (device*) me,
148 NULL /*buffer*/);
149
150 /* VU MEM0 tracking table */
151 sim_core_attach (sd, NULL, 0, access_read_write, 0,
152 ((me->pke_number == 0) ? VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START),
153 ((me->pke_number == 0) ? VU0_MEM0_SIZE : VU1_MEM0_SIZE) / 2,
154 0 /*modulo*/,
155 NULL,
156 NULL /*buffer*/);
157
158 /* VU MEM1 tracking table */
159 sim_core_attach (sd, NULL, 0, access_read_write, 0,
160 ((me->pke_number == 0) ? VU0_MEM1_SRCADDR_START : VU1_MEM1_SRCADDR_START),
161 ((me->pke_number == 0) ? VU0_MEM1_SIZE : VU1_MEM1_SIZE) / 4,
162 0 /*modulo*/,
163 NULL,
164 NULL /*buffer*/);
165
166
167 /* attach to trace file if appropriate */
168 {
169 char trace_envvar[80];
170 char* trace_filename = NULL;
171 sprintf(trace_envvar, "VIF%d_TRACE_FILE", me->pke_number);
172 trace_filename = getenv(trace_envvar);
173 if(trace_filename != NULL)
174 {
175 me->fifo_trace_file = fopen(trace_filename, "w");
176 if(me->fifo_trace_file == NULL)
177 perror("VIF FIFO trace error on fopen");
178 else
179 setvbuf(me->fifo_trace_file, NULL, _IOLBF, 0);
180 }
181 }
182 }
183
184
185
186 /* Handle a PKE read; return no. of bytes read */
187
188 int
189 pke_io_read_buffer(device *me_,
190 void *dest,
191 int space,
192 address_word addr,
193 unsigned nr_bytes,
194 sim_cpu *cpu,
195 sim_cia cia)
196 {
197 /* downcast to gather embedding pke_device struct */
198 struct pke_device* me = (struct pke_device*) me_;
199
200 /* find my address ranges */
201 address_word my_reg_start =
202 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
203 address_word my_fifo_addr =
204 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
205
206 /* enforce that an access does not span more than one quadword */
207 address_word low = ADDR_TRUNC_QW(addr);
208 address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
209 if(low != high)
210 return 0;
211
212 /* classify address & handle */
213 if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
214 {
215 /* register bank */
216 int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
217 int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
218 int readable = 1;
219 quadword result;
220
221 /* clear result */
222 result[0] = result[1] = result[2] = result[3] = 0;
223
224 /* handle reads to individual registers; clear `readable' on error */
225 switch(reg_num)
226 {
227 /* handle common case of register reading, side-effect free */
228 /* PKE1-only registers*/
229 case PKE_REG_BASE:
230 case PKE_REG_OFST:
231 case PKE_REG_TOPS:
232 case PKE_REG_TOP:
233 case PKE_REG_DBF:
234 if(me->pke_number == 0)
235 readable = 0;
236 /* fall through */
237 /* PKE0 & PKE1 common registers*/
238 case PKE_REG_STAT:
239 case PKE_REG_ERR:
240 case PKE_REG_MARK:
241 case PKE_REG_CYCLE:
242 case PKE_REG_MODE:
243 case PKE_REG_NUM:
244 case PKE_REG_MASK:
245 case PKE_REG_CODE:
246 case PKE_REG_ITOPS:
247 case PKE_REG_ITOP:
248 case PKE_REG_R0:
249 case PKE_REG_R1:
250 case PKE_REG_R2:
251 case PKE_REG_R3:
252 case PKE_REG_C0:
253 case PKE_REG_C1:
254 case PKE_REG_C2:
255 case PKE_REG_C3:
256 result[0] = me->regs[reg_num][0];
257 break;
258
259 /* handle common case of write-only registers */
260 case PKE_REG_FBRST:
261 readable = 0;
262 break;
263
264 default:
265 ASSERT(0); /* test above should prevent this possibility */
266 }
267
268 /* perform transfer & return */
269 if(readable)
270 {
271 /* copy the bits */
272 memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes);
273 /* okay */
274 }
275 else
276 {
277 /* return zero bits */
278 memset(dest, 0, nr_bytes);
279 }
280
281 return nr_bytes;
282 /* NOTREACHED */
283 }
284 else if(addr >= my_fifo_addr &&
285 addr < my_fifo_addr + sizeof(quadword))
286 {
287 /* FIFO */
288
289 /* FIFO is not readable: return a word of zeroes */
290 memset(dest, 0, nr_bytes);
291 return nr_bytes;
292 }
293
294 /* NOTREACHED */
295 return 0;
296 }
297
298
299 /* Handle a PKE read; return no. of bytes written */
300
301 int
302 pke_io_write_buffer(device *me_,
303 const void *src,
304 int space,
305 address_word addr,
306 unsigned nr_bytes,
307 sim_cpu *cpu,
308 sim_cia cia)
309 {
310 /* downcast to gather embedding pke_device struct */
311 struct pke_device* me = (struct pke_device*) me_;
312
313 /* find my address ranges */
314 address_word my_reg_start =
315 (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
316 address_word my_fifo_addr =
317 (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
318
319 /* enforce that an access does not span more than one quadword */
320 address_word low = ADDR_TRUNC_QW(addr);
321 address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
322 if(low != high)
323 return 0;
324
325 /* classify address & handle */
326 if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
327 {
328 /* register bank */
329 int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
330 int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
331 int writeable = 1;
332 quadword input;
333
334 /* clear input */
335 input[0] = input[1] = input[2] = input[3] = 0;
336
337 /* write user-given bytes into input */
338 memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes);
339
340 /* handle writes to individual registers; clear `writeable' on error */
341 switch(reg_num)
342 {
343 case PKE_REG_FBRST:
344 /* Order these tests from least to most overriding, in case
345 multiple bits are set. */
346 if(BIT_MASK_GET(input[0], PKE_REG_FBRST_STC_B, PKE_REG_FBRST_STC_E))
347 {
348 /* clear a bunch of status bits */
349 PKE_REG_MASK_SET(me, STAT, PSS, 0);
350 PKE_REG_MASK_SET(me, STAT, PFS, 0);
351 PKE_REG_MASK_SET(me, STAT, PIS, 0);
352 PKE_REG_MASK_SET(me, STAT, INT, 0);
353 PKE_REG_MASK_SET(me, STAT, ER0, 0);
354 PKE_REG_MASK_SET(me, STAT, ER1, 0);
355 me->flags &= ~PKE_FLAG_PENDING_PSS;
356 /* will allow resumption of possible stalled instruction */
357 }
358 if(BIT_MASK_GET(input[0], PKE_REG_FBRST_STP_B, PKE_REG_FBRST_STP_E))
359 {
360 me->flags |= PKE_FLAG_PENDING_PSS;
361 }
362 if(BIT_MASK_GET(input[0], PKE_REG_FBRST_FBK_B, PKE_REG_FBRST_FBK_E))
363 {
364 PKE_REG_MASK_SET(me, STAT, PFS, 1);
365 }
366 if(BIT_MASK_GET(input[0], PKE_REG_FBRST_RST_B, PKE_REG_FBRST_RST_E))
367 {
368 /* clear FIFO by skipping to word after PC: also
369 prevents re-execution attempt of possible stalled
370 instruction */
371 me->fifo_num_elements = me->fifo_pc;
372 /* clear registers, flag, other state */
373 memset(me->regs, 0, sizeof(me->regs));
374 me->fifo_qw_done = 0;
375 me->flags = 0;
376 me->qw_pc = 0;
377 }
378 break;
379
380 case PKE_REG_ERR:
381 /* copy bottom three bits */
382 BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input[0], 0, 2));
383 break;
384
385 case PKE_REG_MARK:
386 /* copy bottom sixteen bits */
387 PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input[0], 0, 15));
388 /* reset MRK bit in STAT */
389 PKE_REG_MASK_SET(me, STAT, MRK, 0);
390 break;
391
392 /* handle common case of read-only registers */
393 /* PKE1-only registers - not really necessary to handle separately */
394 case PKE_REG_BASE:
395 case PKE_REG_OFST:
396 case PKE_REG_TOPS:
397 case PKE_REG_TOP:
398 case PKE_REG_DBF:
399 if(me->pke_number == 0)
400 writeable = 0;
401 /* fall through */
402 /* PKE0 & PKE1 common registers*/
403 case PKE_REG_STAT:
404 /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */
405 case PKE_REG_CYCLE:
406 case PKE_REG_MODE:
407 case PKE_REG_NUM:
408 case PKE_REG_MASK:
409 case PKE_REG_CODE:
410 case PKE_REG_ITOPS:
411 case PKE_REG_ITOP:
412 case PKE_REG_R0:
413 case PKE_REG_R1:
414 case PKE_REG_R2:
415 case PKE_REG_R3:
416 case PKE_REG_C0:
417 case PKE_REG_C1:
418 case PKE_REG_C2:
419 case PKE_REG_C3:
420 writeable = 0;
421 break;
422
423 default:
424 ASSERT(0); /* test above should prevent this possibility */
425 }
426
427 /* perform return */
428 if(! writeable)
429 {
430 ; /* error */
431 }
432
433 return nr_bytes;
434
435 /* NOTREACHED */
436 }
437 else if(addr >= my_fifo_addr &&
438 addr < my_fifo_addr + sizeof(quadword))
439 {
440 /* FIFO */
441 struct fifo_quadword* fqw;
442 int fifo_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside fifo quadword */
443 unsigned_4 dma_tag_present = 0;
444 int i;
445
446 /* collect potentially-partial quadword in write buffer */
447 memcpy(((unsigned_1*)& me->fifo_qw_in_progress) + fifo_byte, src, nr_bytes);
448 /* mark bytes written */
449 for(i = fifo_byte; i < fifo_byte + nr_bytes; i++)
450 BIT_MASK_SET(me->fifo_qw_done, i, i, 1);
451
452 /* return if quadword not quite written yet */
453 if(BIT_MASK_GET(me->fifo_qw_done, 0, sizeof(quadword)-1) !=
454 BIT_MASK_BTW(0, sizeof(quadword)-1))
455 return nr_bytes;
456
457 /* all done - process quadword after clearing flag */
458 BIT_MASK_SET(me->fifo_qw_done, 0, sizeof(quadword)-1, 0);
459
460 /* ensure FIFO has enough elements */
461 if(me->fifo_num_elements == me->fifo_buffer_size)
462 {
463 /* time to grow */
464 int new_fifo_buffer_size = me->fifo_buffer_size + 20;
465 void* ptr = realloc((void*) me->fifo, new_fifo_buffer_size*sizeof(struct fifo_quadword));
466
467 if(ptr == NULL)
468 {
469 /* oops, cannot enlarge FIFO any more */
470 device_error(me_, "Cannot enlarge FIFO buffer\n");
471 return 0;
472 }
473
474 me->fifo = ptr;
475 me->fifo_buffer_size = new_fifo_buffer_size;
476 }
477
478 /* add new quadword at end of FIFO */
479 fqw = & me->fifo[me->fifo_num_elements];
480 fqw->word_class[0] = fqw->word_class[1] =
481 fqw->word_class[2] = fqw->word_class[3] = wc_unknown;
482 memcpy((void*) fqw->data, me->fifo_qw_in_progress, sizeof(quadword));
483 ASSERT(sizeof(unsigned_4) == 4);
484 PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_MADR : DMA_D1_MADR),
485 & fqw->source_address, /* target endian */
486 4);
487 fqw->source_address = T2H_4(fqw->source_address);
488 PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_PKTFLAG : DMA_D1_PKTFLAG),
489 & dma_tag_present,
490 4);
491
492 if(dma_tag_present)
493 {
494 /* lower two words are DMA tags */
495 fqw->word_class[0] = fqw->word_class[1] = wc_dma;
496 }
497
498 me->fifo_num_elements++;
499
500 /* set FQC to "1" as FIFO is now not empty */
501 PKE_REG_MASK_SET(me, STAT, FQC, 1);
502
503 /* okay */
504 return nr_bytes;
505 }
506
507 /* NOTREACHED */
508 return 0;
509 }
510
511
512
513 /* Issue & swallow next PKE opcode if possible/available */
514
515 void
516 pke_issue(SIM_DESC sd, struct pke_device* me)
517 {
518 struct fifo_quadword* fqw;
519 unsigned_4 fw;
520 unsigned_4 cmd, intr, num;
521 unsigned_4 imm;
522
523 /* 1 -- test go / no-go for PKE execution */
524
525 /* switch on STAT:PSS if PSS-pending and in idle state */
526 if((PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE) &&
527 (me->flags & PKE_FLAG_PENDING_PSS) != 0)
528 {
529 me->flags &= ~PKE_FLAG_PENDING_PSS;
530 PKE_REG_MASK_SET(me, STAT, PSS, 1);
531 }
532
533 /* check for stall/halt control bits */
534 if(PKE_REG_MASK_GET(me, STAT, PFS) ||
535 PKE_REG_MASK_GET(me, STAT, PSS) || /* note special treatment below */
536 /* PEW bit not a reason to keep stalling - it's re-checked below */
537 /* PGW bit not a reason to keep stalling - it's re-checked below */
538 /* maskable stall controls: ER0, ER1, PIS */
539 (PKE_REG_MASK_GET(me, STAT, ER0) && !PKE_REG_MASK_GET(me, ERR, ME0)) ||
540 (PKE_REG_MASK_GET(me, STAT, ER1) && !PKE_REG_MASK_GET(me, ERR, ME1)) ||
541 (PKE_REG_MASK_GET(me, STAT, PIS) && !PKE_REG_MASK_GET(me, ERR, MII)))
542 {
543 /* try again next cycle; no state change */
544 return;
545 }
546
547 /* confirm availability of new quadword of PKE instructions */
548 if(me->fifo_num_elements <= me->fifo_pc)
549 return;
550
551
552 /* 2 -- fetch PKE instruction */
553
554 /* skip over DMA tag, if present */
555 pke_pc_advance(me, 0);
556
557 /* "fetch" instruction quadword and word */
558 fqw = & me->fifo[me->fifo_pc];
559 fw = fqw->data[me->qw_pc];
560
561 /* store word in PKECODE register */
562 me->regs[PKE_REG_CODE][0] = fw;
563
564
565 /* 3 -- decode PKE instruction */
566
567 /* PKE instruction format: [intr 0:0][pke-command 6:0][num 7:0][immediate 15:0],
568 so op-code is in top byte. */
569 intr = BIT_MASK_GET(fw, PKE_OPCODE_I_B, PKE_OPCODE_I_E);
570 cmd = BIT_MASK_GET(fw, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E);
571 num = BIT_MASK_GET(fw, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
572 imm = BIT_MASK_GET(fw, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
573
574 if(intr)
575 {
576 /* set INT flag in STAT register */
577 PKE_REG_MASK_SET(me, STAT, INT, 1);
578 /* XXX: send interrupt to 5900? */
579 }
580
581 /* decoding */
582 if(PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE)
583 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE);
584
585 /* decode & execute */
586 if(IS_PKE_CMD(cmd, PKENOP))
587 pke_code_nop(me, fw);
588 else if(IS_PKE_CMD(cmd, STCYCL))
589 pke_code_stcycl(me, fw);
590 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, OFFSET))
591 pke_code_offset(me, fw);
592 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, BASE))
593 pke_code_base(me, fw);
594 else if(IS_PKE_CMD(cmd, ITOP))
595 pke_code_itop(me, fw);
596 else if(IS_PKE_CMD(cmd, STMOD))
597 pke_code_stmod(me, fw);
598 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3))
599 pke_code_mskpath3(me, fw);
600 else if(IS_PKE_CMD(cmd, PKEMARK))
601 pke_code_pkemark(me, fw);
602 else if(IS_PKE_CMD(cmd, FLUSHE))
603 pke_code_flushe(me, fw);
604 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSH))
605 pke_code_flush(me, fw);
606 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA))
607 pke_code_flusha(me, fw);
608 else if(IS_PKE_CMD(cmd, PKEMSCAL))
609 pke_code_pkemscal(me, fw);
610 else if(IS_PKE_CMD(cmd, PKEMSCNT))
611 pke_code_pkemscnt(me, fw);
612 else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF))
613 pke_code_pkemscalf(me, fw);
614 else if(IS_PKE_CMD(cmd, STMASK))
615 pke_code_stmask(me, fw);
616 else if(IS_PKE_CMD(cmd, STROW))
617 pke_code_strow(me, fw);
618 else if(IS_PKE_CMD(cmd, STCOL))
619 pke_code_stcol(me, fw);
620 else if(IS_PKE_CMD(cmd, MPG))
621 pke_code_mpg(me, fw);
622 else if(IS_PKE_CMD(cmd, DIRECT))
623 pke_code_direct(me, fw);
624 else if(IS_PKE_CMD(cmd, DIRECTHL))
625 pke_code_directhl(me, fw);
626 else if(IS_PKE_CMD(cmd, UNPACK))
627 pke_code_unpack(me, fw);
628 /* ... no other commands ... */
629 else
630 pke_code_error(me, fw);
631 }
632
633
634
635 /* advance the PC by given number of data words; update STAT/FQC
636 field; assume FIFO is filled enough; classify passed-over words;
637 write FIFO trace line */
638
639 void
640 pke_pc_advance(struct pke_device* me, int num_words)
641 {
642 int num = num_words;
643 struct fifo_quadword* fq = NULL;
644 ASSERT(num_words >= 0);
645
646 /* printf("pke %d pc_advance num_words %d\n", me->pke_number, num_words); */
647
648 while(1)
649 {
650 fq = & me->fifo[me->fifo_pc];
651
652 /* skip over DMA tag words if present in word 0 or 1 */
653 if(fq->word_class[me->qw_pc] == wc_dma)
654 {
655 /* skip by going around loop an extra time */
656 num ++;
657 }
658
659 /* nothing left to skip / no DMA tag here */
660 if(num == 0)
661 break;
662
663 /* one word skipped */
664 num --;
665
666 /* point to next word */
667 me->qw_pc ++;
668 if(me->qw_pc == 4)
669 {
670 me->qw_pc = 0;
671 me->fifo_pc ++;
672
673 /* trace the consumption of the FIFO quadword we just skipped over */
674 /* fq still points to it */
675 if(me->fifo_trace_file != NULL)
676 {
677 /* assert complete classification */
678 ASSERT(fq->word_class[3] != wc_unknown);
679 ASSERT(fq->word_class[2] != wc_unknown);
680 ASSERT(fq->word_class[1] != wc_unknown);
681 ASSERT(fq->word_class[0] != wc_unknown);
682
683 /* print trace record */
684 fprintf(me->fifo_trace_file,
685 "%d 0x%08x_%08x_%08x_%08x 0x%08x %c%c%c%c\n",
686 (me->pke_number == 0 ? 0 : 1),
687 (unsigned) fq->data[3], (unsigned) fq->data[2],
688 (unsigned) fq->data[1], (unsigned) fq->data[0],
689 (unsigned) fq->source_address,
690 fq->word_class[3], fq->word_class[2],
691 fq->word_class[1], fq->word_class[0]);
692 }
693
694 /* XXX: zap old entries in FIFO */
695 } /* next quadword */
696 }
697
698 /* clear FQC if FIFO is now empty */
699 if(me->fifo_num_elements == me->fifo_pc)
700 {
701 PKE_REG_MASK_SET(me, STAT, FQC, 0);
702 }
703 else /* annote the word where the PC lands as an PKEcode */
704 {
705 fq = & me->fifo[me->fifo_pc];
706 ASSERT(fq->word_class[me->qw_pc] == wc_pkecode ||
707 fq->word_class[me->qw_pc] == wc_unknown);
708 fq->word_class[me->qw_pc] = wc_pkecode;
709 }
710 }
711
712
713
714 /* Return pointer to FIFO quadword containing given operand# in FIFO.
715 `operand_num' starts at 1. Return pointer to operand word in last
716 argument, if non-NULL. If FIFO is not full enough, return 0.
717 Signal an ER0 indication upon skipping a DMA tag. */
718
719 struct fifo_quadword*
720 pke_pc_fifo(struct pke_device* me, int operand_num, unsigned_4** operand)
721 {
722 int num = operand_num;
723 int new_qw_pc, new_fifo_pc;
724 struct fifo_quadword* fq = NULL;
725
726 ASSERT(num > 0);
727
728 /* snapshot current pointers */
729 new_fifo_pc = me->fifo_pc;
730 new_qw_pc = me->qw_pc;
731
732 /* printf("pke %d pc_fifo operand_num %d\n", me->pke_number, operand_num); */
733
734 do
735 {
736 /* one word skipped */
737 num --;
738
739 /* point to next word */
740 new_qw_pc ++;
741 if(new_qw_pc == 4)
742 {
743 new_qw_pc = 0;
744 new_fifo_pc ++;
745 }
746
747 /* check for FIFO underflow */
748 if(me->fifo_num_elements == new_fifo_pc)
749 {
750 fq = NULL;
751 break;
752 }
753
754 /* skip over DMA tag words if present in word 0 or 1 */
755 fq = & me->fifo[new_fifo_pc];
756 if(fq->word_class[new_qw_pc] == wc_dma)
757 {
758 /* mismatch error! */
759 PKE_REG_MASK_SET(me, STAT, ER0, 1);
760 /* skip by going around loop an extra time */
761 num ++;
762 }
763 }
764 while(num > 0);
765
766 /* return pointer to operand word itself */
767 if(fq != NULL)
768 {
769 *operand = & fq->data[new_qw_pc];
770
771 /* annote the word where the pseudo lands as an PKE operand */
772 ASSERT(fq->word_class[new_qw_pc] == wc_pkedata ||
773 fq->word_class[new_qw_pc] == wc_unknown);
774 fq->word_class[new_qw_pc] = wc_pkedata;
775 }
776
777 return fq;
778 }
779
780
781 /* Return pointer to given operand# in FIFO. `operand_num' starts at 1.
782 If FIFO is not full enough, return 0. Skip over DMA tags, but mark
783 them as an error (ER0). */
784
785 unsigned_4*
786 pke_pc_operand(struct pke_device* me, int operand_num)
787 {
788 unsigned_4* operand = NULL;
789 struct fifo_quadword* fifo_operand;
790
791 fifo_operand = pke_pc_fifo(me, operand_num, & operand);
792
793 if(fifo_operand == NULL)
794 ASSERT(operand == NULL); /* pke_pc_fifo() ought leave it untouched */
795
796 return operand;
797 }
798
799
800 /* Return a bit-field extract of given operand# in FIFO, and its
801 source-addr. `bit_offset' starts at 0, referring to LSB after PKE
802 instruction word. Width must be >0, <=32. Assume FIFO is full
803 enough. Skip over DMA tags, but mark them as an error (ER0). */
804
805 unsigned_4
806 pke_pc_operand_bits(struct pke_device* me, int bit_offset, int bit_width, unsigned_4* source_addr)
807 {
808 unsigned_4* word = NULL;
809 unsigned_4 value;
810 struct fifo_quadword* fifo_operand;
811 int wordnumber, bitnumber;
812
813 wordnumber = bit_offset/32;
814 bitnumber = bit_offset%32;
815
816 /* find operand word with bitfield */
817 fifo_operand = pke_pc_fifo(me, wordnumber + 1, &word);
818 ASSERT(word != NULL);
819
820 /* extract bitfield from word */
821 value = BIT_MASK_GET(*word, bitnumber, bitnumber + bit_width - 1);
822
823 /* extract source addr from fifo word */
824 *source_addr = fifo_operand->source_address;
825
826 return value;
827 }
828
829
830
831 /* check for stall conditions on indicated devices (path* only on PKE1), do not change status
832 return 0 iff no stall */
833 int
834 pke_check_stall(struct pke_device* me, enum pke_check_target what)
835 {
836 int any_stall = 0;
837 unsigned_4 cop2_stat, gpuif_stat;
838
839 /* read status words */
840 ASSERT(sizeof(unsigned_4) == 4);
841 PKE_MEM_READ(me, (GIF_REG_STAT),
842 & gpuif_stat,
843 4);
844 PKE_MEM_READ(me, (COP2_REG_STAT_ADDR),
845 & cop2_stat,
846 4);
847
848 /* perform checks */
849 if(what == chk_vu)
850 {
851 if(me->pke_number == 0)
852 any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS0_B, COP2_REG_STAT_VBS0_E);
853 else /* if(me->pke_number == 1) */
854 any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS1_B, COP2_REG_STAT_VBS1_E);
855 }
856 else if(what == chk_path1) /* VU -> GPUIF */
857 {
858 if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 1)
859 any_stall = 1;
860 }
861 else if(what == chk_path2) /* PKE -> GPUIF */
862 {
863 if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 2)
864 any_stall = 1;
865 }
866 else if(what == chk_path3) /* DMA -> GPUIF */
867 {
868 if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 3)
869 any_stall = 1;
870 }
871 else
872 {
873 /* invalid what */
874 ASSERT(0);
875 }
876
877 /* any stall reasons? */
878 return any_stall;
879 }
880
881
882 /* flip the DBF bit; recompute TOPS, ITOP & TOP */
883 void
884 pke_flip_dbf(struct pke_device* me)
885 {
886 /* flip DBF */
887 PKE_REG_MASK_SET(me, DBF, DF,
888 PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1);
889 PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF));
890 /* compute new TOPS */
891 PKE_REG_MASK_SET(me, TOPS, TOPS,
892 (PKE_REG_MASK_GET(me, BASE, BASE) +
893 (PKE_REG_MASK_GET(me, DBF, DF) *
894 PKE_REG_MASK_GET(me, OFST, OFFSET))));
895 /* compute new ITOP and TOP */
896 PKE_REG_MASK_SET(me, ITOP, ITOP,
897 PKE_REG_MASK_GET(me, ITOPS, ITOPS));
898 PKE_REG_MASK_SET(me, TOP, TOP,
899 PKE_REG_MASK_GET(me, TOPS, TOPS));
900 }
901
902
903
904 /* PKEcode handler functions -- responsible for checking and
905 confirming old stall conditions, executing pkecode, updating PC and
906 status registers -- may assume being run on correct PKE unit */
907
908 void
909 pke_code_nop(struct pke_device* me, unsigned_4 pkecode)
910 {
911 /* done */
912 pke_pc_advance(me, 1);
913 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
914 }
915
916
917 void
918 pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode)
919 {
920 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
921 /* copy immediate value into CYCLE reg */
922 PKE_REG_MASK_SET(me, CYCLE, WL, BIT_MASK_GET(imm, 8, 15));
923 PKE_REG_MASK_SET(me, CYCLE, CL, BIT_MASK_GET(imm, 0, 7));
924 /* done */
925 pke_pc_advance(me, 1);
926 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
927 }
928
929
930 void
931 pke_code_offset(struct pke_device* me, unsigned_4 pkecode)
932 {
933 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
934 /* copy 10 bits to OFFSET field */
935 PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9));
936 /* clear DBF bit */
937 PKE_REG_MASK_SET(me, DBF, DF, 0);
938 /* clear other DBF bit */
939 PKE_REG_MASK_SET(me, STAT, DBF, 0);
940 /* set TOPS = BASE */
941 PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE));
942 /* done */
943 pke_pc_advance(me, 1);
944 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
945 }
946
947
948 void
949 pke_code_base(struct pke_device* me, unsigned_4 pkecode)
950 {
951 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
952 /* copy 10 bits to BASE field */
953 PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9));
954 /* clear DBF bit */
955 PKE_REG_MASK_SET(me, DBF, DF, 0);
956 /* clear other DBF bit */
957 PKE_REG_MASK_SET(me, STAT, DBF, 0);
958 /* set TOPS = BASE */
959 PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE));
960 /* done */
961 pke_pc_advance(me, 1);
962 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
963 }
964
965
966 void
967 pke_code_itop(struct pke_device* me, unsigned_4 pkecode)
968 {
969 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
970 /* copy 10 bits to ITOPS field */
971 PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9));
972 /* done */
973 pke_pc_advance(me, 1);
974 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
975 }
976
977
978 void
979 pke_code_stmod(struct pke_device* me, unsigned_4 pkecode)
980 {
981 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
982 /* copy 2 bits to MODE register */
983 PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2));
984 /* done */
985 pke_pc_advance(me, 1);
986 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
987 }
988
989
990 void
991 pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode)
992 {
993 #if 0
994 /* XXX: pending on patrickm support code */
995 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
996 unsigned_4 gif_mode;
997
998 /* read old GIF control register */
999 ASSERT(sizeof(unsigned_4) == 4);
1000 PKE_MEM_READ(me, GIF_REG_MODE, & gif_mode, 4);
1001
1002 /* mask appropriate bit */
1003 if(BIT_MASK_GET(imm, PKE_REG_MSKPATH3_B, PKE_REG_MSKPATH3_E) != 0)
1004 gif_mode |= GIF_REG_MODE_M3R_MASK;
1005 else
1006 gif_mode &= ~GIF_REG_MODE_M3R_MASK;
1007
1008 /* write back modified register */
1009 PKE_MEM_WRITE(me, GIF_REG_MODE, & gif_mode, 4);
1010
1011 #endif
1012
1013 /* done */
1014 pke_pc_advance(me, 1);
1015 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1016 }
1017
1018
1019 void
1020 pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode)
1021 {
1022 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1023 /* copy 16 bits to MARK register */
1024 PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15));
1025 /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */
1026 PKE_REG_MASK_SET(me, STAT, MRK, 1);
1027 /* done */
1028 pke_pc_advance(me, 1);
1029 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1030 }
1031
1032
1033 void
1034 pke_code_flushe(struct pke_device* me, unsigned_4 pkecode)
1035 {
1036 /* compute next PEW bit */
1037 if(pke_check_stall(me, chk_vu))
1038 {
1039 /* VU busy */
1040 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1041 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1042 /* try again next cycle */
1043 }
1044 else
1045 {
1046 /* VU idle */
1047 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1048 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1049 pke_pc_advance(me, 1);
1050 }
1051 }
1052
1053
1054 void
1055 pke_code_flush(struct pke_device* me, unsigned_4 pkecode)
1056 {
1057 int something_busy = 0;
1058
1059 /* compute next PEW, PGW bits */
1060 if(pke_check_stall(me, chk_vu))
1061 {
1062 something_busy = 1;
1063 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1064 }
1065 else
1066 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1067
1068
1069 if(pke_check_stall(me, chk_path1) ||
1070 pke_check_stall(me, chk_path2))
1071 {
1072 something_busy = 1;
1073 PKE_REG_MASK_SET(me, STAT, PGW, 1);
1074 }
1075 else
1076 PKE_REG_MASK_SET(me, STAT, PGW, 0);
1077
1078 /* go or no go */
1079 if(something_busy)
1080 {
1081 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1082 /* try again next cycle */
1083 }
1084 else
1085 {
1086 /* all idle */
1087 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1088 pke_pc_advance(me, 1);
1089 }
1090 }
1091
1092
1093 void
1094 pke_code_flusha(struct pke_device* me, unsigned_4 pkecode)
1095 {
1096 int something_busy = 0;
1097
1098 /* compute next PEW, PGW bits */
1099 if(pke_check_stall(me, chk_vu))
1100 {
1101 something_busy = 1;
1102 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1103 }
1104 else
1105 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1106
1107
1108 if(pke_check_stall(me, chk_path1) ||
1109 pke_check_stall(me, chk_path2) ||
1110 pke_check_stall(me, chk_path3))
1111 {
1112 something_busy = 1;
1113 PKE_REG_MASK_SET(me, STAT, PGW, 1);
1114 }
1115 else
1116 PKE_REG_MASK_SET(me, STAT, PGW, 0);
1117
1118 if(something_busy)
1119 {
1120 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1121 /* try again next cycle */
1122 }
1123 else
1124 {
1125 /* all idle */
1126 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1127 pke_pc_advance(me, 1);
1128 }
1129 }
1130
1131
1132 void
1133 pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode)
1134 {
1135 /* compute next PEW bit */
1136 if(pke_check_stall(me, chk_vu))
1137 {
1138 /* VU busy */
1139 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1140 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1141 /* try again next cycle */
1142 }
1143 else
1144 {
1145 unsigned_4 vu_pc;
1146 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1147
1148 /* VU idle */
1149 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1150
1151 /* flip DBF on PKE1 */
1152 if(me->pke_number == 1)
1153 pke_flip_dbf(me);
1154
1155 /* compute new PC for VU */
1156 vu_pc = BIT_MASK_GET(imm, 0, 15);
1157
1158 /* write new PC; callback function gets VU running */
1159 ASSERT(sizeof(unsigned_4) == 4);
1160 PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1161 & vu_pc,
1162 4);
1163
1164 /* done */
1165 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1166 pke_pc_advance(me, 1);
1167 }
1168 }
1169
1170
1171
1172 void
1173 pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode)
1174 {
1175 /* compute next PEW bit */
1176 if(pke_check_stall(me, chk_vu))
1177 {
1178 /* VU busy */
1179 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1180 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1181 /* try again next cycle */
1182 }
1183 else
1184 {
1185 unsigned_4 vu_pc;
1186
1187 /* VU idle */
1188 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1189
1190 /* flip DBF on PKE1 */
1191 if(me->pke_number == 1)
1192 pke_flip_dbf(me);
1193
1194 /* read old PC */
1195 ASSERT(sizeof(unsigned_4) == 4);
1196 PKE_MEM_READ(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1197 & vu_pc,
1198 4);
1199
1200 /* rewrite new PC; callback function gets VU running */
1201 ASSERT(sizeof(unsigned_4) == 4);
1202 PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1203 & vu_pc,
1204 4);
1205
1206 /* done */
1207 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1208 pke_pc_advance(me, 1);
1209 }
1210 }
1211
1212
1213 void
1214 pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode)
1215 {
1216 int something_busy = 0;
1217
1218 /* compute next PEW, PGW bits */
1219 if(pke_check_stall(me, chk_vu))
1220 {
1221 something_busy = 1;
1222 PKE_REG_MASK_SET(me, STAT, PEW, 1);
1223 }
1224 else
1225 PKE_REG_MASK_SET(me, STAT, PEW, 0);
1226
1227
1228 if(pke_check_stall(me, chk_path1) ||
1229 pke_check_stall(me, chk_path2) ||
1230 pke_check_stall(me, chk_path3))
1231 {
1232 something_busy = 1;
1233 PKE_REG_MASK_SET(me, STAT, PGW, 1);
1234 }
1235 else
1236 PKE_REG_MASK_SET(me, STAT, PGW, 0);
1237
1238 /* go or no go */
1239 if(something_busy)
1240 {
1241 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1242 /* try again next cycle */
1243 }
1244 else
1245 {
1246 unsigned_4 vu_pc;
1247 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1248
1249 /* flip DBF on PKE1 */
1250 if(me->pke_number == 1)
1251 pke_flip_dbf(me);
1252
1253 /* compute new PC for VU */
1254 vu_pc = BIT_MASK_GET(imm, 0, 15);
1255
1256 /* rewrite new PC; callback function gets VU running */
1257 ASSERT(sizeof(unsigned_4) == 4);
1258 PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA),
1259 & vu_pc,
1260 4);
1261
1262 /* done */
1263 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1264 pke_pc_advance(me, 1);
1265 }
1266 }
1267
1268
1269 void
1270 pke_code_stmask(struct pke_device* me, unsigned_4 pkecode)
1271 {
1272 /* check that FIFO has one more word for STMASK operand */
1273 unsigned_4* mask;
1274
1275 mask = pke_pc_operand(me, 1);
1276 if(mask != NULL)
1277 {
1278 /* "transferring" operand */
1279 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1280
1281 /* set NUM */
1282 PKE_REG_MASK_SET(me, NUM, NUM, 1);
1283
1284 /* fill the register */
1285 PKE_REG_MASK_SET(me, MASK, MASK, *mask);
1286
1287 /* set NUM */
1288 PKE_REG_MASK_SET(me, NUM, NUM, 0);
1289
1290 /* done */
1291 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1292 pke_pc_advance(me, 2);
1293 }
1294 else
1295 {
1296 /* need to wait for another word */
1297 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1298 /* try again next cycle */
1299 }
1300 }
1301
1302
1303 void
1304 pke_code_strow(struct pke_device* me, unsigned_4 pkecode)
1305 {
1306 /* check that FIFO has four more words for STROW operand */
1307 unsigned_4* last_op;
1308
1309 last_op = pke_pc_operand(me, 4);
1310 if(last_op != NULL)
1311 {
1312 /* "transferring" operand */
1313 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1314
1315 /* set NUM */
1316 PKE_REG_MASK_SET(me, NUM, NUM, 1);
1317
1318 /* copy ROW registers: must all exist if 4th operand exists */
1319 me->regs[PKE_REG_R0][0] = * pke_pc_operand(me, 1);
1320 me->regs[PKE_REG_R1][0] = * pke_pc_operand(me, 2);
1321 me->regs[PKE_REG_R2][0] = * pke_pc_operand(me, 3);
1322 me->regs[PKE_REG_R3][0] = * pke_pc_operand(me, 4);
1323
1324 /* set NUM */
1325 PKE_REG_MASK_SET(me, NUM, NUM, 0);
1326
1327 /* done */
1328 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1329 pke_pc_advance(me, 5);
1330 }
1331 else
1332 {
1333 /* need to wait for another word */
1334 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1335 /* try again next cycle */
1336 }
1337 }
1338
1339
1340 void
1341 pke_code_stcol(struct pke_device* me, unsigned_4 pkecode)
1342 {
1343 /* check that FIFO has four more words for STCOL operand */
1344 unsigned_4* last_op;
1345
1346 last_op = pke_pc_operand(me, 4);
1347 if(last_op != NULL)
1348 {
1349 /* "transferring" operand */
1350 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1351
1352 /* set NUM */
1353 PKE_REG_MASK_SET(me, NUM, NUM, 1);
1354
1355 /* copy COL registers: must all exist if 4th operand exists */
1356 me->regs[PKE_REG_C0][0] = * pke_pc_operand(me, 1);
1357 me->regs[PKE_REG_C1][0] = * pke_pc_operand(me, 2);
1358 me->regs[PKE_REG_C2][0] = * pke_pc_operand(me, 3);
1359 me->regs[PKE_REG_C3][0] = * pke_pc_operand(me, 4);
1360
1361 /* set NUM */
1362 PKE_REG_MASK_SET(me, NUM, NUM, 0);
1363
1364 /* done */
1365 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1366 pke_pc_advance(me, 5);
1367 }
1368 else
1369 {
1370 /* need to wait for another word */
1371 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1372 /* try again next cycle */
1373 }
1374 }
1375
1376
1377 void
1378 pke_code_mpg(struct pke_device* me, unsigned_4 pkecode)
1379 {
1380 unsigned_4* last_mpg_word;
1381 int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
1382 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1383
1384 /* assert 64-bit alignment of MPG operand */
1385 if(me->qw_pc != 3 && me->qw_pc != 1)
1386 return pke_code_error(me, pkecode);
1387
1388 /* map zero to max+1 */
1389 if(num==0) num=0x100;
1390
1391 /* check that FIFO has a few more words for MPG operand */
1392 last_mpg_word = pke_pc_operand(me, num*2); /* num: number of 64-bit words */
1393 if(last_mpg_word != NULL)
1394 {
1395 /* perform implied FLUSHE */
1396 if(pke_check_stall(me, chk_vu))
1397 {
1398 /* VU busy */
1399 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL);
1400 /* retry this instruction next clock */
1401 }
1402 else
1403 {
1404 /* VU idle */
1405 int i;
1406
1407 /* "transferring" operand */
1408 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1409
1410 /* set NUM */
1411 PKE_REG_MASK_SET(me, NUM, NUM, num);
1412
1413 /* transfer VU instructions, one word-pair per iteration */
1414 for(i=0; i<num; i++)
1415 {
1416 address_word vu_addr_base, vu_addr;
1417 address_word vutrack_addr_base, vutrack_addr;
1418 unsigned_4 vu_lower_opcode, vu_upper_opcode;
1419 unsigned_4* operand;
1420 unsigned_4 source_addr;
1421 struct fifo_quadword* fq;
1422 int next_num;
1423
1424 /* decrement NUM */
1425 next_num = PKE_REG_MASK_GET(me, NUM, NUM) - 1;
1426 PKE_REG_MASK_SET(me, NUM, NUM, next_num);
1427
1428 /* imm: in 64-bit units for MPG instruction */
1429 /* VU*_MEM0 : instruction memory */
1430 vu_addr_base = (me->pke_number == 0) ?
1431 VU0_MEM0_WINDOW_START : VU0_MEM0_WINDOW_START;
1432 vu_addr = vu_addr_base + (imm + i) * 8;
1433
1434 /* XXX: overflow check! */
1435
1436 /* VU*_MEM0_TRACK : source-addr tracking table */
1437 vutrack_addr_base = (me->pke_number == 0) ?
1438 VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START;
1439 vutrack_addr = vutrack_addr_base + (imm + i) * 4;
1440
1441 /* Fetch operand words; assume they are already little-endian for VU imem */
1442 fq = pke_pc_fifo(me, i*2 + 1, & operand);
1443 vu_lower_opcode = *operand;
1444 vu_upper_opcode = *pke_pc_operand(me, i*2 + 2);
1445
1446 /* write data into VU memory */
1447 /* lower (scalar) opcode comes in first word */
1448 PKE_MEM_WRITE(me, vu_addr,
1449 & vu_lower_opcode,
1450 4);
1451 /* upper (vector) opcode comes in second word */
1452 ASSERT(sizeof(unsigned_4) == 4);
1453 PKE_MEM_WRITE(me, vu_addr + 4,
1454 & vu_upper_opcode,
1455 4);
1456
1457 /* write tracking address in target byte-order */
1458 source_addr = H2T_4(fq->source_address);
1459 ASSERT(sizeof(unsigned_4) == 4);
1460 PKE_MEM_WRITE(me, vutrack_addr,
1461 & source_addr,
1462 4);
1463 } /* VU xfer loop */
1464
1465 /* check NUM */
1466 ASSERT(PKE_REG_MASK_GET(me, NUM, NUM) == 0);
1467
1468 /* done */
1469 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1470 pke_pc_advance(me, 1 + num*2);
1471 }
1472 } /* if FIFO full enough */
1473 else
1474 {
1475 /* need to wait for another word */
1476 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1477 /* retry this instruction next clock */
1478 }
1479 }
1480
1481
1482 void
1483 pke_code_direct(struct pke_device* me, unsigned_4 pkecode)
1484 {
1485 /* check that FIFO has a few more words for DIRECT operand */
1486 unsigned_4* last_direct_word;
1487 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1488
1489 /* assert 128-bit alignment of DIRECT operand */
1490 if(me->qw_pc != 3)
1491 return pke_code_error(me, pkecode);
1492
1493 /* map zero to max+1 */
1494 if(imm==0) imm=0x10000;
1495
1496 last_direct_word = pke_pc_operand(me, imm*4); /* imm: number of 128-bit words */
1497 if(last_direct_word != NULL)
1498 {
1499 /* VU idle */
1500 int i;
1501 quadword fifo_data;
1502
1503 /* "transferring" operand */
1504 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1505
1506 /* transfer GPUIF quadwords, one word per iteration */
1507 for(i=0; i<imm*4; i++)
1508 {
1509 unsigned_4* operand = pke_pc_operand(me, 1+i);
1510
1511 /* collect word into quadword */
1512 fifo_data[i % 4] = *operand;
1513
1514 /* write to GPUIF FIFO only with full quadword */
1515 if(i % 4 == 3)
1516 {
1517 ASSERT(sizeof(fifo_data) == 16);
1518 PKE_MEM_WRITE(me, GIF_PATH2_FIFO_ADDR,
1519 fifo_data,
1520 16);
1521 } /* write collected quadword */
1522
1523 } /* GPUIF xfer loop */
1524
1525 /* done */
1526 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1527 pke_pc_advance(me, 1 + imm*4);
1528 } /* if FIFO full enough */
1529 else
1530 {
1531 /* need to wait for another word */
1532 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1533 /* retry this instruction next clock */
1534 }
1535 }
1536
1537
1538 void
1539 pke_code_directhl(struct pke_device* me, unsigned_4 pkecode)
1540 {
1541 /* treat the same as DIRECTH */
1542 pke_code_direct(me, pkecode);
1543 }
1544
1545
1546 void
1547 pke_code_unpack(struct pke_device* me, unsigned_4 pkecode)
1548 {
1549 int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
1550 int cmd = BIT_MASK_GET(pkecode, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E);
1551 int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
1552 short vn = BIT_MASK_GET(cmd, 2, 3); /* unpack shape controls */
1553 short vl = BIT_MASK_GET(cmd, 0, 1);
1554 int m = BIT_MASK_GET(cmd, 4, 4);
1555 short cl = PKE_REG_MASK_GET(me, CYCLE, CL); /* cycle controls */
1556 short wl = PKE_REG_MASK_GET(me, CYCLE, WL);
1557 int r = BIT_MASK_GET(imm, 15, 15); /* indicator bits in imm value */
1558 int usn = BIT_MASK_GET(imm, 14, 14);
1559
1560 int n, num_operands;
1561 unsigned_4* last_operand_word = NULL;
1562
1563 /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */
1564 if(wl <= cl)
1565 n = num;
1566 else
1567 n = cl * (num/wl) + PKE_LIMIT(num % wl, cl);
1568 num_operands = ((32 >> vl) * (vn+1) * n)/32;
1569
1570 /* confirm that FIFO has enough words in it */
1571 if(num_operands > 0)
1572 last_operand_word = pke_pc_operand(me, num_operands);
1573 if(last_operand_word != NULL || num_operands == 0)
1574 {
1575 address_word vu_addr_base, vutrack_addr_base;
1576 address_word vu_addr_max_size;
1577 int vector_num_out, vector_num_in;
1578
1579 /* "transferring" operand */
1580 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
1581
1582 /* don't check whether VU is idle */
1583
1584 /* compute VU address base */
1585 if(me->pke_number == 0)
1586 {
1587 vu_addr_base = VU0_MEM1_WINDOW_START + 16 * BIT_MASK_GET(imm, 0, 9);
1588 vu_addr_max_size = VU0_MEM1_SIZE;
1589 vutrack_addr_base = VU0_MEM1_SRCADDR_START + 4 * BIT_MASK_GET(imm, 0, 9);
1590 }
1591 else
1592 {
1593 vu_addr_base = VU1_MEM1_WINDOW_START + 16 * BIT_MASK_GET(imm, 0, 9);
1594 vu_addr_max_size = VU1_MEM1_SIZE;
1595 vutrack_addr_base = VU1_MEM1_SRCADDR_START + 4 * BIT_MASK_GET(imm, 0, 9);
1596 if(r) /* double-buffering */
1597 {
1598 vu_addr_base += 16 * PKE_REG_MASK_GET(me, TOPS, TOPS);
1599 vutrack_addr_base += 4 * PKE_REG_MASK_GET(me, TOPS, TOPS);
1600 }
1601 }
1602
1603
1604 /* set NUM */
1605 PKE_REG_MASK_SET(me, NUM, NUM, num == 0 ? 0x100 : num );
1606
1607 /* transfer given number of vectors */
1608 vector_num_out = 0; /* output vector number being processed */
1609 vector_num_in = 0; /* argument vector number being processed */
1610 do
1611 {
1612 quadword vu_old_data;
1613 quadword vu_new_data;
1614 quadword unpacked_data;
1615 address_word vu_addr;
1616 address_word vutrack_addr;
1617 unsigned_4 source_addr = 0;
1618 int i;
1619 int next_num;
1620
1621 /* decrement NUM */
1622 next_num = PKE_REG_MASK_GET(me, NUM, NUM) - 1;
1623 PKE_REG_MASK_SET(me, NUM, NUM, next_num);
1624
1625 /* compute VU destination address, as bytes in R5900 memory */
1626 if(cl >= wl)
1627 {
1628 /* map zero to max+1 */
1629 int addrwl = (wl == 0) ? 0x0100 : wl;
1630 vu_addr = vu_addr_base + 16*(cl*(vector_num_out/addrwl) + (vector_num_out%addrwl));
1631 }
1632 else
1633 vu_addr = vu_addr_base + 16*vector_num_out;
1634
1635 /* check for vu_addr overflow */
1636 while(vu_addr >= vu_addr_base + vu_addr_max_size)
1637 vu_addr -= vu_addr_max_size;
1638
1639 /* compute address of tracking table entry */
1640 vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 4;
1641
1642 /* read old VU data word at address */
1643 ASSERT(sizeof(vu_old_data) == 16);
1644 PKE_MEM_READ(me, vu_addr,
1645 vu_old_data,
1646 16);
1647
1648 /* yank memory out of little-endian order */
1649 for(i=0; i<4; i++)
1650 vu_old_data[i] = LE2H_4(vu_old_data[i]);
1651
1652 /* For cyclic unpack, next operand quadword may come from instruction stream
1653 or be zero. */
1654 if((num == 0 && cl == 0 && wl == 0) || /* shortcut clear */
1655 ((cl < wl) && ((vector_num_out % wl) >= cl))) /* wl != 0, set above */
1656 {
1657 /* clear operand - used only in a "indeterminate" state */
1658 for(i = 0; i < 4; i++)
1659 unpacked_data[i] = 0;
1660 }
1661 else
1662 {
1663 /* compute packed vector dimensions */
1664 int vectorbits, unitbits;
1665
1666 if(vl < 3) /* PKE_UNPACK_*_{32,16,8} */
1667 {
1668 unitbits = (32 >> vl);
1669 vectorbits = unitbits * (vn+1);
1670 }
1671 else if(vl == 3 && vn == 3) /* PKE_UNPACK_V4_5 */
1672 {
1673 unitbits = 5;
1674 vectorbits = 16;
1675 }
1676 else /* illegal unpack variant */
1677 {
1678 /* treat as illegal instruction */
1679 pke_code_error(me, pkecode);
1680 return;
1681 }
1682
1683 /* loop over columns */
1684 for(i=0; i<=vn; i++)
1685 {
1686 unsigned_4 operand;
1687
1688 /* offset in bits in current operand word */
1689 int bitoffset =
1690 (vector_num_in * vectorbits) + (i * unitbits); /* # of bits from PKEcode */
1691
1692 /* last unit of V4_5 is only one bit wide */
1693 if(vl == 3 && vn == 3 && i == 3) /* PKE_UNPACK_V4_5 */
1694 unitbits = 1;
1695
1696 /* fetch bitfield operand */
1697 operand = pke_pc_operand_bits(me, bitoffset, unitbits, & source_addr);
1698
1699 /* selectively sign-extend; not for V4_5 1-bit value */
1700 if(usn || unitbits == 1)
1701 unpacked_data[i] = operand;
1702 else
1703 unpacked_data[i] = SEXT32(operand, unitbits-1);
1704 }
1705
1706 /* consumed a vector from the PKE instruction stream */
1707 vector_num_in ++;
1708 } /* unpack word from instruction operand */
1709
1710 /* compute replacement word */
1711 if(m) /* use mask register? */
1712 {
1713 /* compute index into mask register for this word */
1714 int addrwl = (wl == 0) ? 0x0100 : wl;
1715 int mask_index = PKE_LIMIT(vector_num_out % addrwl, 3);
1716
1717 for(i=0; i<4; i++) /* loop over columns */
1718 {
1719 int mask_op = PKE_MASKREG_GET(me, mask_index, i);
1720 unsigned_4* masked_value = NULL;
1721 unsigned_4 zero = 0;
1722
1723 switch(mask_op)
1724 {
1725 case PKE_MASKREG_INPUT:
1726 /* for vn == 0, all columns are copied from column 0 */
1727 if(vn == 0)
1728 masked_value = & unpacked_data[0];
1729 else if(i > vn)
1730 masked_value = & zero; /* arbitrary data: undefined in spec */
1731 else
1732 masked_value = & unpacked_data[i];
1733 break;
1734
1735 case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */
1736 masked_value = & me->regs[PKE_REG_R0 + i][0];
1737 break;
1738
1739 case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */
1740 masked_value = & me->regs[PKE_REG_C0 + mask_index][0];
1741 break;
1742
1743 case PKE_MASKREG_NOTHING:
1744 /* "write inhibit" by re-copying old data */
1745 masked_value = & vu_old_data[i];
1746 break;
1747
1748 default:
1749 ASSERT(0);
1750 /* no other cases possible */
1751 }
1752
1753 /* copy masked value for column */
1754 vu_new_data[i] = *masked_value;
1755 } /* loop over columns */
1756 } /* mask */
1757 else
1758 {
1759 /* no mask - just copy over entire unpacked quadword */
1760 memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data));
1761 }
1762
1763 /* process STMOD register for accumulation operations */
1764 switch(PKE_REG_MASK_GET(me, MODE, MDE))
1765 {
1766 case PKE_MODE_ADDROW: /* add row registers to output data */
1767 for(i=0; i<4; i++)
1768 /* exploit R0..R3 contiguity */
1769 vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
1770 break;
1771
1772 case PKE_MODE_ACCROW: /* add row registers to output data; accumulate */
1773 for(i=0; i<4; i++)
1774 {
1775 /* exploit R0..R3 contiguity */
1776 vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
1777 me->regs[PKE_REG_R0 + i][0] = vu_new_data[i];
1778 }
1779 break;
1780
1781 case PKE_MODE_INPUT: /* pass data through */
1782 default:
1783 ;
1784 }
1785
1786 /* yank memory into little-endian order */
1787 for(i=0; i<4; i++)
1788 vu_new_data[i] = H2LE_4(vu_new_data[i]);
1789
1790 /* write replacement word */
1791 ASSERT(sizeof(vu_new_data) == 16);
1792 PKE_MEM_WRITE(me, vu_addr,
1793 vu_new_data,
1794 16);
1795
1796 /* write tracking address in target byte-order */
1797 source_addr = H2T_4(source_addr);
1798 ASSERT(sizeof(unsigned_4) == 4);
1799 PKE_MEM_WRITE(me, vutrack_addr,
1800 & source_addr,
1801 4);
1802
1803 /* next vector please */
1804 vector_num_out ++;
1805 } /* vector transfer loop */
1806 while(PKE_REG_MASK_GET(me, NUM, NUM) > 0);
1807
1808 /* done */
1809 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1810 pke_pc_advance(me, 1 + num_operands);
1811 } /* PKE FIFO full enough */
1812 else
1813 {
1814 /* need to wait for another word */
1815 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT);
1816 /* retry this instruction next clock */
1817 }
1818 }
1819
1820
1821 void
1822 pke_code_error(struct pke_device* me, unsigned_4 pkecode)
1823 {
1824 /* set ER1 flag in STAT register */
1825 PKE_REG_MASK_SET(me, STAT, ER1, 1);
1826 /* advance over faulty word */
1827 PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE);
1828 pke_pc_advance(me, 1);
1829 }
This page took 0.066983 seconds and 4 git commands to generate.