Commit | Line | Data |
---|---|---|
aea481da DE |
1 | /* Copyright (C) 1998, Cygnus Solutions */ |
2 | ||
aea481da | 3 | #include <stdlib.h> |
fba9bfed FCE |
4 | #include "sky-pke.h" |
5 | #include "sky-dma.h" | |
6 | #include "sim-assert.h" | |
7 | #include "sky-vu0.h" | |
8 | #include "sky-vu1.h" | |
9 | #include "sky-gpuif.h" | |
aea481da | 10 | |
aea481da DE |
11 | /* Imported functions */ |
12 | ||
13 | void device_error (device *me, char* message); /* device.c */ | |
14 | ||
15 | ||
16 | /* Internal function declarations */ | |
17 | ||
18 | static int pke_io_read_buffer(device*, void*, int, address_word, | |
19 | unsigned, sim_cpu*, sim_cia); | |
20 | static int pke_io_write_buffer(device*, const void*, int, address_word, | |
21 | unsigned, sim_cpu*, sim_cia); | |
22 | static void pke_issue(struct pke_device*); | |
fba9bfed FCE |
23 | static void pke_pc_advance(struct pke_device*, int num_words); |
24 | static unsigned_4* pke_pc_operand(struct pke_device*, int word_num); | |
25 | static struct fifo_quadword* pke_pc_fifo(struct pke_device*, int word_num); | |
26 | static int pke_track_write(struct pke_device*, const void* src, int len, | |
27 | address_word dest, unsigned_4 sourceaddr); | |
28 | static void pke_attach(SIM_DESC sd, struct pke_device* me); | |
29 | ||
aea481da DE |
30 | |
31 | ||
32 | /* Static data */ | |
33 | ||
34 | struct pke_device pke0_device = | |
35 | { | |
36 | { "pke0", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */ | |
37 | 0, 0, /* ID, flags */ | |
aea481da DE |
38 | {}, /* regs */ |
39 | NULL, 0, 0, NULL, /* FIFO */ | |
fba9bfed | 40 | 0, 0 /* pc */ |
aea481da DE |
41 | }; |
42 | ||
43 | ||
44 | struct pke_device pke1_device = | |
45 | { | |
46 | { "pke1", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */ | |
47 | 1, 0, /* ID, flags */ | |
aea481da DE |
48 | {}, /* regs */ |
49 | NULL, 0, 0, NULL, /* FIFO */ | |
fba9bfed | 50 | 0, 0 /* pc */ |
aea481da DE |
51 | }; |
52 | ||
53 | ||
54 | ||
55 | /* External functions */ | |
56 | ||
57 | ||
fba9bfed | 58 | /* Attach PKE addresses to main memory */ |
aea481da DE |
59 | |
60 | void | |
61 | pke0_attach(SIM_DESC sd) | |
62 | { | |
fba9bfed FCE |
63 | pke_attach(sd, & pke0_device); |
64 | } | |
aea481da | 65 | |
fba9bfed FCE |
66 | void |
67 | pke1_attach(SIM_DESC sd) | |
68 | { | |
69 | pke_attach(sd, & pke1_device); | |
aea481da DE |
70 | } |
71 | ||
72 | ||
fba9bfed FCE |
73 | |
74 | /* Issue a PKE instruction if possible */ | |
aea481da DE |
75 | |
76 | void | |
52793fab | 77 | pke0_issue(void) |
aea481da | 78 | { |
fba9bfed FCE |
79 | pke_issue(& pke0_device); |
80 | } | |
81 | ||
82 | void | |
52793fab | 83 | pke1_issue(void) |
fba9bfed FCE |
84 | { |
85 | pke_issue(& pke0_device); | |
86 | } | |
87 | ||
88 | ||
89 | ||
90 | /* Internal functions */ | |
91 | ||
92 | ||
93 | /* Attach PKE memory regions to simulator */ | |
94 | ||
95 | void | |
96 | pke_attach(SIM_DESC sd, struct pke_device* me) | |
97 | { | |
98 | /* register file */ | |
aea481da DE |
99 | sim_core_attach (sd, |
100 | NULL, | |
101 | 0 /*level*/, | |
102 | access_read_write, | |
103 | 0 /*space ???*/, | |
fba9bfed | 104 | (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START, |
aea481da DE |
105 | PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/, |
106 | 0 /*modulo*/, | |
fba9bfed | 107 | (device*) &pke0_device, |
aea481da DE |
108 | NULL /*buffer*/); |
109 | ||
fba9bfed | 110 | /* FIFO port */ |
aea481da DE |
111 | sim_core_attach (sd, |
112 | NULL, | |
113 | 0 /*level*/, | |
114 | access_read_write, | |
115 | 0 /*space ???*/, | |
fba9bfed | 116 | (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR, |
aea481da DE |
117 | sizeof(quadword) /*nr_bytes*/, |
118 | 0 /*modulo*/, | |
119 | (device*) &pke1_device, | |
120 | NULL /*buffer*/); | |
aea481da | 121 | |
fba9bfed FCE |
122 | /* source-addr tracking word */ |
123 | sim_core_attach (sd, | |
124 | NULL, | |
125 | 0 /*level*/, | |
126 | access_read_write, | |
127 | 0 /*space ???*/, | |
128 | (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR, | |
129 | sizeof(unsigned_4) /*nr_bytes*/, | |
130 | 0 /*modulo*/, | |
131 | NULL, | |
132 | zalloc(sizeof(unsigned_4)) /*buffer*/); | |
aea481da DE |
133 | } |
134 | ||
135 | ||
136 | ||
aea481da DE |
137 | /* Handle a PKE read; return no. of bytes read */ |
138 | ||
139 | int | |
140 | pke_io_read_buffer(device *me_, | |
141 | void *dest, | |
142 | int space, | |
143 | address_word addr, | |
144 | unsigned nr_bytes, | |
fba9bfed | 145 | sim_cpu *cpu, |
aea481da DE |
146 | sim_cia cia) |
147 | { | |
148 | /* downcast to gather embedding pke_device struct */ | |
149 | struct pke_device* me = (struct pke_device*) me_; | |
150 | ||
fba9bfed FCE |
151 | /* find my address ranges */ |
152 | address_word my_reg_start = | |
153 | (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START; | |
154 | address_word my_fifo_addr = | |
155 | (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR; | |
156 | ||
aea481da DE |
157 | /* enforce that an access does not span more than one quadword */ |
158 | address_word low = ADDR_TRUNC_QW(addr); | |
159 | address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1); | |
160 | if(low != high) | |
161 | return 0; | |
162 | ||
163 | /* classify address & handle */ | |
fba9bfed | 164 | if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE)) |
aea481da DE |
165 | { |
166 | /* register bank */ | |
fba9bfed FCE |
167 | int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4; |
168 | int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */ | |
aea481da | 169 | int readable = 1; |
fba9bfed FCE |
170 | quadword result; |
171 | ||
172 | /* clear result */ | |
173 | result[0] = result[1] = result[2] = result[3] = 0; | |
aea481da | 174 | |
fba9bfed | 175 | /* handle reads to individual registers; clear `readable' on error */ |
aea481da DE |
176 | switch(reg_num) |
177 | { | |
fba9bfed FCE |
178 | /* handle common case of register reading, side-effect free */ |
179 | /* PKE1-only registers*/ | |
aea481da DE |
180 | case PKE_REG_BASE: |
181 | case PKE_REG_OFST: | |
182 | case PKE_REG_TOPS: | |
183 | case PKE_REG_TOP: | |
184 | case PKE_REG_DBF: | |
fba9bfed | 185 | if(me->pke_number == 0) |
aea481da | 186 | readable = 0; |
fba9bfed FCE |
187 | /* fall through */ |
188 | /* PKE0 & PKE1 common registers*/ | |
189 | case PKE_REG_STAT: | |
190 | case PKE_REG_ERR: | |
191 | case PKE_REG_MARK: | |
192 | case PKE_REG_CYCLE: | |
193 | case PKE_REG_MODE: | |
194 | case PKE_REG_NUM: | |
195 | case PKE_REG_MASK: | |
196 | case PKE_REG_CODE: | |
197 | case PKE_REG_ITOPS: | |
198 | case PKE_REG_ITOP: | |
199 | case PKE_REG_R0: | |
200 | case PKE_REG_R1: | |
201 | case PKE_REG_R2: | |
202 | case PKE_REG_R3: | |
203 | case PKE_REG_C0: | |
204 | case PKE_REG_C1: | |
205 | case PKE_REG_C2: | |
206 | case PKE_REG_C3: | |
207 | result[0] = me->regs[reg_num][0]; | |
208 | break; | |
209 | ||
210 | /* handle common case of write-only registers */ | |
211 | case PKE_REG_FBRST: | |
212 | readable = 0; | |
213 | break; | |
214 | ||
215 | default: | |
216 | ASSERT(0); /* test above should prevent this possibility */ | |
aea481da DE |
217 | } |
218 | ||
fba9bfed | 219 | /* perform transfer & return */ |
aea481da DE |
220 | if(readable) |
221 | { | |
aea481da | 222 | /* copy the bits */ |
fba9bfed | 223 | memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes); |
aea481da DE |
224 | /* okay */ |
225 | return nr_bytes; | |
226 | } | |
227 | else | |
228 | { | |
229 | /* error */ | |
230 | return 0; | |
231 | } | |
232 | ||
233 | /* NOTREACHED */ | |
234 | } | |
fba9bfed FCE |
235 | else if(addr >= my_fifo_addr && |
236 | addr < my_fifo_addr + sizeof(quadword)) | |
aea481da DE |
237 | { |
238 | /* FIFO */ | |
239 | ||
fba9bfed FCE |
240 | /* FIFO is not readable: return a word of zeroes */ |
241 | memset(dest, 0, nr_bytes); | |
242 | return nr_bytes; | |
aea481da DE |
243 | } |
244 | ||
245 | /* NOTREACHED */ | |
fba9bfed | 246 | return 0; |
aea481da DE |
247 | } |
248 | ||
249 | ||
250 | /* Handle a PKE read; return no. of bytes written */ | |
251 | ||
252 | int | |
253 | pke_io_write_buffer(device *me_, | |
254 | const void *src, | |
255 | int space, | |
256 | address_word addr, | |
257 | unsigned nr_bytes, | |
fba9bfed | 258 | sim_cpu *cpu, |
aea481da DE |
259 | sim_cia cia) |
260 | { | |
261 | /* downcast to gather embedding pke_device struct */ | |
262 | struct pke_device* me = (struct pke_device*) me_; | |
263 | ||
fba9bfed FCE |
264 | /* find my address ranges */ |
265 | address_word my_reg_start = | |
266 | (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START; | |
267 | address_word my_fifo_addr = | |
268 | (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR; | |
269 | ||
aea481da DE |
270 | /* enforce that an access does not span more than one quadword */ |
271 | address_word low = ADDR_TRUNC_QW(addr); | |
272 | address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1); | |
273 | if(low != high) | |
274 | return 0; | |
275 | ||
276 | /* classify address & handle */ | |
fba9bfed | 277 | if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE)) |
aea481da DE |
278 | { |
279 | /* register bank */ | |
fba9bfed FCE |
280 | int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4; |
281 | int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */ | |
aea481da | 282 | int writeable = 1; |
fba9bfed FCE |
283 | quadword input; |
284 | ||
285 | /* clear input */ | |
286 | input[0] = input[1] = input[2] = input[3] = 0; | |
aea481da | 287 | |
fba9bfed FCE |
288 | /* write user-given bytes into input */ |
289 | memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes); | |
290 | ||
291 | /* handle writes to individual registers; clear `writeable' on error */ | |
aea481da DE |
292 | switch(reg_num) |
293 | { | |
fba9bfed FCE |
294 | case PKE_REG_FBRST: |
295 | /* XXX: order of evaluation? STP && STC ?? */ | |
296 | if(BIT_MASK_GET(input[0], 0, 0)) /* RST bit */ | |
297 | { | |
298 | /* clear FIFO: also prevents re-execution attempt of | |
299 | possible stalled instruction */ | |
300 | me->fifo_num_elements = me->fifo_pc; | |
301 | /* clear registers */ | |
302 | memset(me->regs, 0, sizeof(me->regs)); | |
303 | me->flags = 0; | |
304 | me->qw_pc = 0; | |
305 | } | |
306 | if(BIT_MASK_GET(input[0], 1, 1)) /* FBK bit */ | |
307 | { | |
308 | PKE_REG_MASK_SET(me, STAT, PFS, 1); | |
309 | } | |
310 | if(BIT_MASK_GET(input[0], 2, 2)) /* STP bit */ | |
311 | { | |
312 | /* XXX: how to safely abort "currently executing" (=> stalled) instruction? */ | |
313 | PKE_REG_MASK_SET(me, STAT, PSS, 1); | |
314 | } | |
315 | if(BIT_MASK_GET(input[0], 2, 2)) /* STC bit */ | |
316 | { | |
317 | /* clear a bunch of status bits */ | |
318 | PKE_REG_MASK_SET(me, STAT, PSS, 0); | |
319 | PKE_REG_MASK_SET(me, STAT, PFS, 0); | |
320 | PKE_REG_MASK_SET(me, STAT, PIS, 0); | |
321 | PKE_REG_MASK_SET(me, STAT, INT, 0); | |
322 | PKE_REG_MASK_SET(me, STAT, ER0, 0); | |
323 | PKE_REG_MASK_SET(me, STAT, ER1, 0); | |
324 | /* will allow resumption of possible stalled instruction */ | |
325 | } | |
326 | break; | |
327 | ||
328 | case PKE_REG_ERR: | |
329 | /* copy bottom three bits */ | |
330 | BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input[0], 0, 2)); | |
331 | break; | |
332 | ||
333 | case PKE_REG_MARK: | |
334 | /* copy bottom sixteen bits */ | |
335 | PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input[0], 0, 15)); | |
336 | /* reset MRK bit in STAT */ | |
337 | PKE_REG_MASK_SET(me, STAT, MRK, 0); | |
338 | break; | |
339 | ||
340 | /* handle common case of read-only registers */ | |
341 | /* PKE1-only registers - not really necessary to handle separately */ | |
aea481da DE |
342 | case PKE_REG_BASE: |
343 | case PKE_REG_OFST: | |
344 | case PKE_REG_TOPS: | |
345 | case PKE_REG_TOP: | |
346 | case PKE_REG_DBF: | |
fba9bfed | 347 | if(me->pke_number == 0) |
aea481da | 348 | writeable = 0; |
fba9bfed FCE |
349 | /* fall through */ |
350 | /* PKE0 & PKE1 common registers*/ | |
351 | case PKE_REG_STAT: | |
352 | /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */ | |
353 | case PKE_REG_CYCLE: | |
354 | case PKE_REG_MODE: | |
355 | case PKE_REG_NUM: | |
356 | case PKE_REG_MASK: | |
357 | case PKE_REG_CODE: | |
358 | case PKE_REG_ITOPS: | |
359 | case PKE_REG_ITOP: | |
360 | case PKE_REG_R0: | |
361 | case PKE_REG_R1: | |
362 | case PKE_REG_R2: | |
363 | case PKE_REG_R3: | |
364 | case PKE_REG_C0: | |
365 | case PKE_REG_C1: | |
366 | case PKE_REG_C2: | |
367 | case PKE_REG_C3: | |
368 | writeable = 0; | |
369 | break; | |
370 | ||
371 | default: | |
372 | ASSERT(0); /* test above should prevent this possibility */ | |
aea481da DE |
373 | } |
374 | ||
fba9bfed FCE |
375 | /* perform return */ |
376 | if(writeable) | |
aea481da | 377 | { |
fba9bfed | 378 | /* okay */ |
aea481da DE |
379 | return nr_bytes; |
380 | } | |
381 | else | |
382 | { | |
383 | /* error */ | |
384 | return 0; | |
385 | } | |
386 | ||
387 | /* NOTREACHED */ | |
388 | } | |
fba9bfed FCE |
389 | else if(addr >= my_fifo_addr && |
390 | addr < my_fifo_addr + sizeof(quadword)) | |
aea481da DE |
391 | { |
392 | /* FIFO */ | |
fba9bfed | 393 | struct fifo_quadword* fqw; |
aea481da DE |
394 | |
395 | /* assert transfer size == 128 bits */ | |
396 | if(nr_bytes != sizeof(quadword)) | |
397 | return 0; | |
398 | ||
399 | /* ensure FIFO has enough elements */ | |
400 | if(me->fifo_num_elements == me->fifo_buffer_size) | |
401 | { | |
402 | /* time to grow */ | |
403 | int new_fifo_buffer_size = me->fifo_buffer_size + 20; | |
404 | void* ptr = realloc((void*) me->fifo, new_fifo_buffer_size*sizeof(quadword)); | |
405 | ||
406 | if(ptr == NULL) | |
407 | { | |
408 | /* oops, cannot enlarge FIFO any more */ | |
409 | device_error(me_, "Cannot enlarge FIFO buffer\n"); | |
410 | return 0; | |
411 | } | |
412 | ||
413 | me->fifo_buffer_size = new_fifo_buffer_size; | |
414 | } | |
415 | ||
416 | /* add new quadword at end of FIFO */ | |
fba9bfed FCE |
417 | fqw = & me->fifo[me->fifo_num_elements]; |
418 | memcpy((void*) fqw->data, src, nr_bytes); | |
419 | sim_read(CPU_STATE(cpu), | |
420 | (SIM_ADDR) (me->pke_number == 0 ? DMA_CHANNEL0_SRCADDR : DMA_CHANNEL1_SRCADDR), | |
421 | (void*) & fqw->source_address, | |
422 | sizeof(address_word)); | |
423 | sim_read(CPU_STATE(cpu), | |
424 | (SIM_ADDR) (me->pke_number == 0 ? DMA_CHANNEL0_PKTFLAG : DMA_CHANNEL1_PKTFLAG), | |
425 | (void*) & fqw->dma_tag_present, | |
426 | sizeof(unsigned_4)); | |
427 | /* XXX: check RC */ | |
428 | ||
429 | me->fifo_num_elements++; | |
430 | ||
431 | /* set FQC to "1" as FIFO is now not empty */ | |
432 | PKE_REG_MASK_SET(me, STAT, FQC, 1); | |
aea481da DE |
433 | |
434 | /* okay */ | |
435 | return nr_bytes; | |
436 | } | |
437 | ||
438 | /* NOTREACHED */ | |
fba9bfed | 439 | return 0; |
aea481da DE |
440 | } |
441 | ||
442 | ||
443 | ||
fba9bfed | 444 | /* Issue & swallow next PKE opcode if possible/available */ |
aea481da DE |
445 | |
446 | void | |
447 | pke_issue(struct pke_device* me) | |
448 | { | |
fba9bfed FCE |
449 | struct fifo_quadword* fqw; |
450 | unsigned_4 fw; | |
451 | unsigned_4 cmd, intr, num; | |
452 | unsigned_4 imm; | |
453 | int next_pps_state; /* PPS after this instruction issue attempt */ | |
454 | ||
455 | /* 1 -- test go / no-go for PKE execution */ | |
456 | ||
457 | /* check for stall/halt control bits */ | |
458 | /* XXX: What is the PEW bit for? */ | |
459 | if(PKE_REG_MASK_GET(me, STAT, PSS) || | |
460 | PKE_REG_MASK_GET(me, STAT, PFS) || | |
461 | /* maskable stall controls: ER0, ER1, PIS */ | |
462 | (PKE_REG_MASK_GET(me, STAT, ER0) && !PKE_REG_MASK_GET(me, ERR, ME0)) || | |
463 | (PKE_REG_MASK_GET(me, STAT, ER1) && !PKE_REG_MASK_GET(me, ERR, ME1)) || | |
464 | (PKE_REG_MASK_GET(me, STAT, PIS) && !PKE_REG_MASK_GET(me, ERR, MII))) | |
465 | { | |
466 | /* XXX */ | |
467 | } | |
468 | /* XXX: handle PSS by *skipping* instruction? */ | |
469 | ||
470 | /* confirm availability of new quadword of PKE instructions */ | |
471 | if(me->fifo_num_elements <= me->fifo_pc) | |
472 | return; | |
473 | ||
474 | ||
475 | /* 2 -- fetch PKE instruction */ | |
476 | ||
477 | /* "fetch" instruction quadword */ | |
478 | fqw = & me->fifo[me->fifo_pc]; | |
479 | ||
480 | /* skip over DMA tags, if present */ | |
481 | if((fqw->dma_tag_present != 0) && (me->qw_pc < 2)) | |
482 | { | |
483 | ASSERT(me->qw_pc == 0); | |
484 | /* XXX: check validity of DMA tag; if bad, set ER0 flag */ | |
485 | me->qw_pc = 2; | |
486 | } | |
487 | ||
488 | /* "fetch" instruction word */ | |
489 | fw = fqw->data[me->qw_pc]; | |
490 | ||
491 | /* store it in PKECODE register */ | |
492 | me->regs[PKE_REG_CODE][0] = fw; | |
493 | ||
494 | ||
495 | /* 3 -- decode PKE instruction */ | |
496 | ||
497 | /* PKE instruction format: [intr 0:0][pke-command 6:0][num 7:0][immediate 15:0], | |
498 | so op-code is in top byte. */ | |
499 | intr = BIT_MASK_GET(fw, PKE_OPCODE_I_B, PKE_OPCODE_I_E); | |
500 | cmd = BIT_MASK_GET(fw, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E); | |
501 | num = BIT_MASK_GET(fw, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E); | |
502 | imm = BIT_MASK_GET(fw, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
503 | ||
504 | if(intr) | |
505 | { | |
506 | /* set INT flag in STAT register */ | |
507 | PKE_REG_MASK_SET(me, STAT, INT, 1); | |
508 | /* XXX: send interrupt to R5900? */ | |
509 | } | |
510 | ||
511 | /* decoding */ | |
512 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE); | |
513 | next_pps_state = PKE_REG_STAT_PPS_IDLE; /* assume instruction completes */ | |
514 | ||
515 | /* decode */ | |
516 | if(IS_PKE_CMD(cmd, PKENOP)) | |
517 | { | |
518 | /* no work required, yey */ | |
519 | pke_pc_advance(me, 1); | |
520 | } | |
521 | else if(IS_PKE_CMD(cmd, STCYCL)) | |
522 | { | |
523 | /* copy immediate value into CYCLE reg */ | |
524 | me->regs[PKE_REG_CYCLE][0] = imm; | |
525 | pke_pc_advance(me, 1); | |
526 | } | |
527 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, OFFSET)) | |
528 | { | |
529 | /* copy 10 bits to OFFSET field */ | |
530 | PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9)); | |
531 | /* clear DBF bit */ | |
532 | PKE_REG_MASK_SET(me, DBF, DF, 0); | |
533 | /* clear other DBF bit */ | |
534 | PKE_REG_MASK_SET(me, STAT, DBF, 0); | |
535 | /* set TOPS = BASE */ | |
536 | PKE_REG_MASK_SET(me, TOPS, TOPS, | |
537 | PKE_REG_MASK_GET(me, BASE, BASE)); | |
538 | pke_pc_advance(me, 1); | |
539 | } | |
540 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, BASE)) | |
541 | { | |
542 | /* copy 10 bits to BASE field */ | |
543 | PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9)); | |
544 | /* clear DBF bit */ | |
545 | PKE_REG_MASK_SET(me, DBF, DF, 0); | |
546 | /* clear other DBF bit */ | |
547 | PKE_REG_MASK_SET(me, STAT, DBF, 0); | |
548 | /* set TOPS = BASE */ | |
549 | PKE_REG_MASK_SET(me, TOPS, TOPS, | |
550 | PKE_REG_MASK_GET(me, BASE, BASE)); | |
551 | pke_pc_advance(me, 1); | |
552 | } | |
553 | else if(IS_PKE_CMD(cmd, ITOP)) | |
554 | { | |
555 | /* copy 10 bits to ITOPS field */ | |
556 | PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9)); | |
557 | pke_pc_advance(me, 1); | |
558 | } | |
559 | else if(IS_PKE_CMD(cmd, STMOD)) | |
560 | { | |
561 | /* copy 2 bits to MODE register */ | |
562 | PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2)); | |
563 | pke_pc_advance(me, 1); | |
564 | } | |
565 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3)) /* MSKPATH3 */ | |
566 | { | |
567 | /* XXX: what to do with this? DMA control register? */ | |
568 | pke_pc_advance(me, 1); | |
569 | } | |
570 | else if(IS_PKE_CMD(cmd, PKEMARK)) | |
571 | { | |
572 | /* copy 16 bits to MARK register */ | |
573 | PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15)); | |
574 | /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */ | |
575 | PKE_REG_MASK_SET(me, STAT, MRK, 1); | |
576 | pke_pc_advance(me, 1); | |
577 | } | |
578 | else if(IS_PKE_CMD(cmd, FLUSHE)) | |
579 | { | |
580 | /* read VU status word */ | |
581 | unsigned_4 vu_stat; | |
582 | sim_read(NULL, | |
583 | (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), | |
584 | (void*) & vu_stat, | |
585 | sizeof(unsigned_4)); | |
586 | /* XXX: check RC */ | |
587 | ||
588 | /* check if VBS bit is clear, i.e., VU is idle */ | |
589 | if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) | |
590 | { | |
591 | /* VU idle */ | |
592 | /* advance PC */ | |
593 | pke_pc_advance(me, 1); | |
594 | } | |
595 | else | |
596 | { | |
597 | /* VU busy */ | |
598 | next_pps_state = PKE_REG_STAT_PPS_WAIT; | |
599 | /* retry this instruction next clock */ | |
600 | } | |
601 | } | |
602 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSH)) | |
603 | { | |
604 | /* read VU status word */ | |
605 | unsigned_4 vu_stat; | |
606 | sim_read(NULL, | |
607 | (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), | |
608 | (void*) & vu_stat, | |
609 | sizeof(unsigned_4)); | |
610 | /* XXX: check RC */ | |
611 | ||
612 | /* check if VGW bit is clear, i.e., PATH1 is idle */ | |
613 | /* simulator design implies PATH2 is always "idle" */ | |
614 | if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 && | |
615 | BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 && | |
616 | 1 /* PATH2 always idle */) | |
617 | { | |
618 | /* VU idle */ | |
619 | /* PATH1 idle */ | |
620 | /* PATH2 idle */ | |
621 | /* advance PC */ | |
622 | pke_pc_advance(me, 1); | |
623 | } | |
624 | else | |
625 | { | |
626 | /* GPUIF busy */ | |
627 | /* retry this instruction next clock */ | |
628 | } | |
629 | } | |
630 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA)) | |
631 | { | |
632 | /* read VU status word */ | |
633 | unsigned_4 vu_stat; | |
634 | sim_read(NULL, | |
635 | (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), | |
636 | (void*) & vu_stat, | |
637 | sizeof(unsigned_4)); | |
638 | /* XXX: check RC */ | |
639 | ||
640 | /* check if VGW bit is clear, i.e., PATH1 is idle */ | |
641 | /* simulator design implies PATH2 is always "idle" */ | |
642 | /* XXX: simulator design implies PATH3 is always "idle" */ | |
643 | if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 && | |
644 | BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 && | |
645 | 1 /* PATH2 always idle */ && | |
646 | 1 /* PATH3 always idle */) | |
647 | { | |
648 | /* VU idle */ | |
649 | /* PATH1 idle */ | |
650 | /* PATH2 idle */ | |
651 | /* PATH3 idle */ | |
652 | /* advance PC */ | |
653 | pke_pc_advance(me, 1); | |
654 | } | |
655 | else | |
656 | { | |
657 | /* GPUIF busy */ | |
658 | /* retry this instruction next clock */ | |
659 | } | |
660 | } | |
661 | else if(IS_PKE_CMD(cmd, PKEMSCAL)) | |
662 | { | |
663 | /* read VU status word */ | |
664 | unsigned_4 vu_stat; | |
665 | sim_read(NULL, | |
666 | (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), | |
667 | (void*) & vu_stat, | |
668 | sizeof(unsigned_4)); | |
669 | /* XXX: check RC */ | |
670 | ||
671 | /* check if VBS bit is clear, i.e., VU is idle */ | |
672 | if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) | |
673 | { | |
674 | /* VU idle */ | |
675 | unsigned_4 vu_pc; | |
676 | ||
677 | /* perform PKE1-unique processing for microprogram calls */ | |
678 | if(me->pke_number == 1) | |
679 | { | |
680 | /* flip DBF */ | |
681 | PKE_REG_MASK_SET(me, DBF, DF, | |
682 | PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1); | |
683 | PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF)); | |
684 | /* compute new TOPS */ | |
685 | PKE_REG_MASK_SET(me, TOPS, TOPS, | |
686 | (PKE_REG_MASK_GET(me, BASE, BASE) + | |
687 | (PKE_REG_MASK_GET(me, DBF, DF) * | |
688 | PKE_REG_MASK_GET(me, OFST, OFFSET)))); | |
689 | /* compute new ITOP and TOP */ | |
690 | PKE_REG_MASK_SET(me, ITOP, ITOP, | |
691 | PKE_REG_MASK_GET(me, ITOPS, ITOPS)); | |
692 | PKE_REG_MASK_SET(me, TOP, TOP, | |
693 | PKE_REG_MASK_GET(me, TOPS, TOPS)); | |
694 | } | |
695 | ||
696 | /* compute new PC */ | |
697 | vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */ | |
698 | /* write new PC; callback function gets VU running */ | |
699 | sim_write(NULL, | |
700 | (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), | |
701 | (void*) & vu_pc, | |
702 | sizeof(unsigned_4)); | |
703 | /* advance PC */ | |
704 | pke_pc_advance(me, 1); | |
705 | } | |
706 | else | |
707 | { | |
708 | /* VU busy */ | |
709 | next_pps_state = PKE_REG_STAT_PPS_WAIT; | |
710 | /* retry this instruction next clock */ | |
711 | } | |
712 | } | |
713 | else if(IS_PKE_CMD(cmd, PKEMSCNT)) | |
714 | { | |
715 | /* read VU status word */ | |
716 | unsigned_4 vu_stat; | |
717 | sim_read(NULL, | |
718 | (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), | |
719 | (void*) & vu_stat, | |
720 | sizeof(unsigned_4)); | |
721 | /* XXX: check RC */ | |
722 | ||
723 | /* check if VBS bit is clear, i.e., VU is idle */ | |
724 | if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) | |
725 | { | |
726 | /* VU idle */ | |
727 | unsigned_4 vu_pc; | |
728 | ||
729 | /* flip DBF etc. for PKE1 */ | |
730 | if(me->pke_number == 1) | |
731 | { | |
732 | PKE_REG_MASK_SET(me, DBF, DF, | |
733 | PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1); | |
734 | PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF)); | |
735 | PKE_REG_MASK_SET(me, TOPS, TOPS, | |
736 | (PKE_REG_MASK_GET(me, BASE, BASE) + | |
737 | (PKE_REG_MASK_GET(me, DBF, DF) * | |
738 | PKE_REG_MASK_GET(me, OFST, OFFSET)))); | |
739 | PKE_REG_MASK_SET(me, ITOP, ITOP, | |
740 | PKE_REG_MASK_GET(me, ITOPS, ITOPS)); | |
741 | PKE_REG_MASK_SET(me, TOP, TOP, | |
742 | PKE_REG_MASK_GET(me, TOPS, TOPS)); | |
743 | } | |
744 | ||
745 | /* read old PC */ | |
746 | sim_read(NULL, | |
747 | (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), | |
748 | (void*) & vu_pc, | |
749 | sizeof(unsigned_4)); | |
750 | /* rewrite its PC; callback function gets VU running */ | |
751 | sim_write(NULL, | |
752 | (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), | |
753 | (void*) & vu_pc, | |
754 | sizeof(unsigned_4)); | |
755 | /* advance PC */ | |
756 | pke_pc_advance(me, 1); | |
757 | } | |
758 | else | |
759 | { | |
760 | /* VU busy */ | |
761 | next_pps_state = PKE_REG_STAT_PPS_WAIT; | |
762 | /* retry this instruction next clock */ | |
763 | } | |
764 | } | |
765 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF)) | |
766 | { | |
767 | /* read VU status word */ | |
768 | unsigned_4 vu_stat; | |
769 | sim_read(NULL, | |
770 | (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), | |
771 | (void*) & vu_stat, | |
772 | sizeof(unsigned_4)); | |
773 | /* XXX: check RC */ | |
774 | ||
775 | /* check if VGW bit is clear, i.e., PATH1 is idle */ | |
776 | /* simulator design implies PATH2 is always "idle" */ | |
777 | if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 && | |
778 | BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 && | |
779 | 1 /* PATH2 always idle */) | |
780 | { | |
781 | /* VU idle */ | |
782 | /* PATH1 idle */ | |
783 | /* PATH2 idle */ | |
784 | unsigned_4 vu_pc; | |
785 | ||
786 | /* flip DBF etc. for PKE1 */ | |
787 | if(me->pke_number == 1) | |
788 | { | |
789 | PKE_REG_MASK_SET(me, DBF, DF, | |
790 | PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1); | |
791 | PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF)); | |
792 | PKE_REG_MASK_SET(me, TOPS, TOPS, | |
793 | (PKE_REG_MASK_GET(me, BASE, BASE) + | |
794 | (PKE_REG_MASK_GET(me, DBF, DF) * | |
795 | PKE_REG_MASK_GET(me, OFST, OFFSET)))); | |
796 | PKE_REG_MASK_SET(me, ITOP, ITOP, | |
797 | PKE_REG_MASK_GET(me, ITOPS, ITOPS)); | |
798 | PKE_REG_MASK_SET(me, TOP, TOP, | |
799 | PKE_REG_MASK_GET(me, TOPS, TOPS)); | |
800 | } | |
801 | ||
802 | /* compute new PC */ | |
803 | vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */ | |
804 | /* write new PC; callback function gets VU running */ | |
805 | sim_write(NULL, | |
806 | (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), | |
807 | (void*) & vu_pc, | |
808 | sizeof(unsigned_4)); | |
809 | /* advance PC */ | |
810 | pke_pc_advance(me, 1); | |
811 | } | |
812 | else | |
813 | { | |
814 | /* VU busy */ | |
815 | next_pps_state = PKE_REG_STAT_PPS_WAIT; | |
816 | /* retry this instruction next clock */ | |
817 | } | |
818 | } | |
819 | else if(IS_PKE_CMD(cmd, STMASK)) | |
820 | { | |
821 | /* check that FIFO has one more word for STMASK operand */ | |
822 | unsigned_4* mask; | |
823 | ||
824 | mask = pke_pc_operand(me, 1); | |
825 | if(mask != NULL) | |
826 | { | |
827 | /* "transferring" operand */ | |
828 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
829 | /* fill the register */ | |
830 | PKE_REG_MASK_SET(me, MASK, MASK, *mask); | |
831 | /* advance PC */ | |
832 | pke_pc_advance(me, 2); | |
833 | } | |
834 | else | |
835 | { | |
836 | /* need to wait for another word */ | |
837 | next_pps_state = PKE_REG_STAT_PPS_WAIT; | |
838 | /* retry this instruction next clock */ | |
839 | } | |
840 | } | |
841 | else if(IS_PKE_CMD(cmd, STROW)) | |
842 | { | |
843 | /* check that FIFO has four more words for STROW operand */ | |
844 | unsigned_4* last_op; | |
845 | ||
846 | last_op = pke_pc_operand(me, 4); | |
847 | if(last_op != NULL) | |
848 | { | |
849 | /* "transferring" operand */ | |
850 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
851 | ||
852 | /* copy ROW registers: must all exist if 4th operand exists */ | |
853 | me->regs[PKE_REG_R0][0] = * pke_pc_operand(me, 1); | |
854 | me->regs[PKE_REG_R1][0] = * pke_pc_operand(me, 2); | |
855 | me->regs[PKE_REG_R2][0] = * pke_pc_operand(me, 3); | |
856 | me->regs[PKE_REG_R3][0] = * pke_pc_operand(me, 4); | |
857 | ||
858 | /* advance PC */ | |
859 | pke_pc_advance(me, 5); | |
860 | } | |
861 | else | |
862 | { | |
863 | /* need to wait for another word */ | |
864 | next_pps_state = PKE_REG_STAT_PPS_WAIT; | |
865 | /* retry this instruction next clock */ | |
866 | } | |
867 | } | |
868 | else if(IS_PKE_CMD(cmd, STCOL)) | |
869 | { | |
870 | /* check that FIFO has four more words for STCOL operand */ | |
871 | unsigned_4* last_op; | |
872 | ||
873 | last_op = pke_pc_operand(me, 4); | |
874 | if(last_op != NULL) | |
875 | { | |
876 | /* "transferring" operand */ | |
877 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
878 | ||
879 | /* copy COL registers: must all exist if 4th operand exists */ | |
880 | me->regs[PKE_REG_C0][0] = * pke_pc_operand(me, 1); | |
881 | me->regs[PKE_REG_C1][0] = * pke_pc_operand(me, 2); | |
882 | me->regs[PKE_REG_C2][0] = * pke_pc_operand(me, 3); | |
883 | me->regs[PKE_REG_C3][0] = * pke_pc_operand(me, 4); | |
884 | ||
885 | /* advance PC */ | |
886 | pke_pc_advance(me, 5); | |
887 | } | |
888 | else | |
889 | { | |
890 | /* need to wait for another word */ | |
891 | next_pps_state = PKE_REG_STAT_PPS_WAIT; | |
892 | /* retry this instruction next clock */ | |
893 | } | |
894 | } | |
895 | else if(IS_PKE_CMD(cmd, MPG)) | |
896 | { | |
897 | unsigned_4* last_mpg_word; | |
898 | ||
899 | /* map zero to max+1 */ | |
900 | if(num==0) num=0x100; | |
901 | ||
902 | /* check that FIFO has a few more words for MPG operand */ | |
903 | last_mpg_word = pke_pc_operand(me, num*2); /* num: number of 64-bit words */ | |
904 | if(last_mpg_word != NULL) | |
905 | { | |
906 | /* perform implied FLUSHE */ | |
907 | /* read VU status word */ | |
908 | unsigned_4 vu_stat; | |
909 | sim_read(NULL, | |
910 | (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), | |
911 | (void*) & vu_stat, | |
912 | sizeof(unsigned_4)); | |
913 | /* XXX: check RC */ | |
914 | ||
915 | /* check if VBS bit is clear, i.e., VU is idle */ | |
916 | if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) | |
917 | { | |
918 | /* VU idle */ | |
919 | int i; | |
920 | ||
921 | /* "transferring" operand */ | |
922 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
923 | ||
924 | /* transfer VU instructions, one word per iteration */ | |
925 | for(i=0; i<num*2; i++) | |
926 | { | |
927 | address_word vu_addr_base, vu_addr; | |
928 | address_word vutrack_addr_base, vutrack_addr; | |
929 | struct fifo_quadword* fq = pke_pc_fifo(me, num); | |
930 | unsigned_4* operand = pke_pc_operand(me, num); | |
931 | ||
932 | /* imm: in 64-bit units for MPG instruction */ | |
933 | ||
934 | /* XXX: set NUM */ | |
935 | ||
936 | /* VU*_MEM0 : instruction memory */ | |
937 | vu_addr_base = (me->pke_number == 0) ? | |
938 | VU0_MEM0_WINDOW_START : VU0_MEM0_WINDOW_START; | |
939 | vu_addr = vu_addr_base + (imm*2) + i; | |
940 | ||
941 | /* VU*_MEM0_TRACK : source-addr tracking table */ | |
942 | vutrack_addr_base = (me->pke_number == 0) ? | |
943 | VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START; | |
944 | vutrack_addr = vu_addr_base + (imm*2) + i; | |
945 | ||
946 | /* write data into VU memory */ | |
947 | pke_track_write(me, operand, sizeof(unsigned_4), | |
948 | vu_addr, fq->source_address); | |
949 | ||
950 | /* write srcaddr into VU srcaddr tracking table */ | |
951 | sim_write(NULL, | |
952 | (SIM_ADDR) vutrack_addr, | |
953 | (void*) & fq->source_address, | |
954 | sizeof(unsigned_4)); | |
955 | /* XXX: check RC */ | |
956 | } /* VU xfer loop */ | |
957 | ||
958 | /* advance PC */ | |
959 | pke_pc_advance(me, 1 + num*2); | |
960 | } | |
961 | else | |
962 | { | |
963 | /* VU busy */ | |
964 | next_pps_state = PKE_REG_STAT_PPS_WAIT; | |
965 | /* retry this instruction next clock */ | |
966 | } | |
967 | } /* if FIFO full enough */ | |
968 | else | |
969 | { | |
970 | /* need to wait for another word */ | |
971 | next_pps_state = PKE_REG_STAT_PPS_WAIT; | |
972 | /* retry this instruction next clock */ | |
973 | } | |
974 | } | |
975 | else if(IS_PKE_CMD(cmd, DIRECT) || IS_PKE_CMD(cmd, DIRECTHL)) /* treat identically */ | |
976 | { | |
977 | /* check that FIFO has a few more words for DIRECT operand */ | |
978 | unsigned_4* last_direct_word; | |
979 | ||
980 | /* map zero to max+1 */ | |
981 | if(imm==0) imm=0x10000; | |
982 | ||
983 | last_direct_word = pke_pc_operand(me, imm*4); /* num: number of 128-bit words */ | |
984 | if(last_direct_word != NULL) | |
985 | { | |
986 | /* VU idle */ | |
987 | int i; | |
988 | quadword fifo_data; | |
989 | ||
990 | /* "transferring" operand */ | |
991 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
992 | ||
993 | /* transfer GPUIF quadwords, one word per iteration */ | |
994 | for(i=0; i<imm*4; i++) | |
995 | { | |
996 | struct fifo_quadword* fq = pke_pc_fifo(me, num); | |
997 | unsigned_4* operand = pke_pc_operand(me, num); | |
998 | ||
999 | /* collect word into quadword */ | |
1000 | fifo_data[i%4] = *operand; | |
1001 | ||
1002 | /* write to GPUIF FIFO only with full word */ | |
1003 | if(i%4 == 3) | |
1004 | { | |
1005 | address_word gpuif_fifo = GPUIF_PATH2_FIFO_ADDR+(i/4); | |
1006 | pke_track_write(me, fifo_data, sizeof(quadword), | |
1007 | (SIM_ADDR) gpuif_fifo, fq->source_address); | |
1008 | /* XXX: check RC */ | |
1009 | } /* write collected quadword */ | |
1010 | ||
1011 | } /* GPUIF xfer loop */ | |
1012 | ||
1013 | /* advance PC */ | |
1014 | pke_pc_advance(me, 1 + imm*4); | |
1015 | } /* if FIFO full enough */ | |
1016 | else | |
1017 | { | |
1018 | /* need to wait for another word */ | |
1019 | next_pps_state = PKE_REG_STAT_PPS_WAIT; | |
1020 | /* retry this instruction next clock */ | |
1021 | } | |
1022 | } | |
1023 | else if(IS_PKE_CMD(cmd, UNPACK)) /* warning: monster complexity */ | |
1024 | { | |
1025 | short vn = BIT_MASK_GET(cmd, 2, 3); | |
1026 | short vl = BIT_MASK_GET(cmd, 0, 1); | |
1027 | short vnvl = BIT_MASK_GET(cmd, 0, 3); | |
1028 | int m = BIT_MASK_GET(cmd, 4, 4); | |
1029 | short cl = PKE_REG_MASK_GET(me, CYCLE, CL); | |
1030 | short wl = PKE_REG_MASK_GET(me, CYCLE, WL); | |
1031 | int n, num_operands; | |
1032 | unsigned_4* last_operand_word; | |
1033 | ||
1034 | /* map zero to max+1 */ | |
1035 | if(num==0) num=0x100; | |
1036 | ||
1037 | /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */ | |
1038 | if(wl <= cl) | |
1039 | n = num; | |
1040 | else | |
1041 | n = cl * (num/wl) + PKE_LIMIT(num % wl, cl); | |
1042 | num_operands = (((sizeof(unsigned_4) >> vl) * (vn+1) * n)/sizeof(unsigned_4)); | |
1043 | ||
1044 | /* confirm that FIFO has enough words in it */ | |
1045 | last_operand_word = pke_pc_operand(me, num_operands); | |
1046 | if(last_operand_word != NULL) | |
1047 | { | |
1048 | address_word vu_addr_base; | |
1049 | int operand_num, vector_num; | |
1050 | ||
1051 | /* "transferring" operand */ | |
1052 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
1053 | ||
1054 | /* XXX: don't check whether VU is idle?? */ | |
1055 | ||
1056 | if(me->pke_number == 0) | |
1057 | vu_addr_base = VU0_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9); | |
1058 | else | |
1059 | { | |
1060 | vu_addr_base = VU1_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9); | |
1061 | if(BIT_MASK_GET(imm, 15, 15)) /* fetch R flag from imm word */ | |
1062 | vu_addr_base += PKE_REG_MASK_GET(me, TOPS, TOPS); | |
1063 | } | |
1064 | ||
1065 | /* XXX: vu_addr overflow check */ | |
1066 | ||
1067 | /* transfer given number of vectors */ | |
1068 | operand_num = 1; /* word index into instruction stream: 1..num_operands */ | |
1069 | vector_num = 0; /* vector number being processed: 0..num-1 */ | |
1070 | while(operand_num <= num_operands) | |
1071 | { | |
1072 | quadword vu_old_data; | |
1073 | quadword vu_new_data; | |
1074 | quadword unpacked_data; | |
1075 | address_word vu_addr; | |
1076 | struct fifo_quadword* fq; | |
1077 | int i; | |
1078 | ||
1079 | /* XXX: set NUM */ | |
1080 | ||
1081 | /* compute VU destination address, as bytes in R5900 memory */ | |
1082 | if(cl >= wl) | |
1083 | { | |
1084 | /* map zero to max+1 */ | |
1085 | if(wl == 0) wl = 0x0100; | |
1086 | vu_addr = vu_addr_base + 16*(cl*(vector_num/wl) + (vector_num%wl)); | |
1087 | } | |
1088 | else | |
1089 | vu_addr = vu_addr_base + 16*vector_num; | |
1090 | ||
1091 | /* read old VU data word at address */ | |
1092 | sim_read(NULL, (SIM_ADDR) vu_addr, (void*) & vu_old_data, sizeof(vu_old_data)); | |
1093 | ||
1094 | /* Let sourceaddr track the first operand */ | |
1095 | fq = pke_pc_fifo(me, operand_num); | |
1096 | ||
1097 | /* For cyclic unpack, next operand quadword may come from instruction stream | |
1098 | or be zero. */ | |
1099 | if((cl < wl) && ((vector_num % wl) >= cl)) /* wl != 0, set above */ | |
1100 | { | |
1101 | /* clear operand - used only in a "indeterminate" state */ | |
1102 | for(i = 0; i < 4; i++) | |
1103 | unpacked_data[i] = 0; | |
1104 | } | |
1105 | else | |
1106 | { | |
1107 | /* compute unpacked words from instruction stream */ | |
1108 | switch(vnvl) | |
1109 | { | |
1110 | case PKE_UNPACK_S_32: | |
1111 | case PKE_UNPACK_V2_32: | |
1112 | case PKE_UNPACK_V3_32: | |
1113 | case PKE_UNPACK_V4_32: | |
1114 | /* copy (vn+1) 32-bit values */ | |
1115 | for(i = 0; i < vn+1; i++) | |
1116 | { | |
1117 | unsigned_4* operand = pke_pc_operand(me, operand_num); | |
1118 | unpacked_data[i] = *operand; | |
1119 | operand_num ++; | |
1120 | } | |
1121 | break; | |
1122 | ||
1123 | case PKE_UNPACK_S_16: | |
1124 | case PKE_UNPACK_V2_16: | |
1125 | case PKE_UNPACK_V3_16: | |
1126 | case PKE_UNPACK_V4_16: | |
1127 | /* copy (vn+1) 16-bit values, packed two-per-word */ | |
1128 | for(i=0; i<vn+1; i+=2) | |
1129 | { | |
1130 | unsigned_4* operand = pke_pc_operand(me, operand_num); | |
1131 | unpacked_data[i] = BIT_MASK_GET_SX(*operand, 0, 15, 31); | |
1132 | unpacked_data[i+1] = BIT_MASK_GET_SX(*operand, 16, 31, 31); | |
1133 | operand_num ++; | |
1134 | } | |
1135 | break; | |
1136 | ||
1137 | case PKE_UNPACK_S_8: | |
1138 | case PKE_UNPACK_V2_8: | |
1139 | case PKE_UNPACK_V3_8: | |
1140 | case PKE_UNPACK_V4_8: | |
1141 | /* copy (vn+1) 8-bit values, packed four-per-word */ | |
1142 | for(i=0; i<vn+1; i+=4) | |
1143 | { | |
1144 | unsigned_4* operand = pke_pc_operand(me, operand_num); | |
1145 | unpacked_data[i] = BIT_MASK_GET_SX(*operand, 0, 7, 31); | |
1146 | unpacked_data[i+1] = BIT_MASK_GET_SX(*operand, 8, 15, 31); | |
1147 | unpacked_data[i+2] = BIT_MASK_GET_SX(*operand, 16, 23, 31); | |
1148 | unpacked_data[i+3] = BIT_MASK_GET_SX(*operand, 24, 31, 31); | |
1149 | operand_num ++; | |
1150 | } | |
1151 | break; | |
1152 | ||
1153 | case PKE_UNPACK_V4_5: | |
1154 | /* copy four 1/5/5/5-bit values, packed into a sixteen-bit */ | |
1155 | for(i=0; i<vn+1; i+=4) | |
1156 | { | |
1157 | unsigned_4* operand = pke_pc_operand(me, operand_num); | |
1158 | unpacked_data[i] = BIT_MASK_GET_SX(*operand, 0, 4, 31); | |
1159 | unpacked_data[i+1] = BIT_MASK_GET_SX(*operand, 5, 9, 31); | |
1160 | unpacked_data[i+2] = BIT_MASK_GET_SX(*operand, 10, 14, 31); | |
1161 | unpacked_data[i+3] = BIT_MASK_GET_SX(*operand, 15, 15, 31); | |
1162 | /* ignore other 16 bits in operand */ | |
1163 | operand_num ++; | |
1164 | } | |
1165 | break; | |
1166 | ||
1167 | default: /* bad UNPACK code */ | |
1168 | { | |
1169 | /* XXX: how to handle? */ | |
1170 | /* set ER1 flag in STAT register */ | |
1171 | PKE_REG_MASK_SET(me, STAT, ER1, 1); | |
1172 | } | |
1173 | } | |
1174 | } | |
1175 | ||
1176 | /* compute replacement word - function of vn, vl, mask */ | |
1177 | if(m) /* use mask register? */ | |
1178 | { | |
1179 | /* compute index into mask register for this word */ | |
1180 | int mask_index = PKE_LIMIT(vector_num % wl, 3); /* wl != 0, set above */ | |
1181 | ||
1182 | for(i=0; i<3; i++) /* loop over columns */ | |
1183 | { | |
1184 | int mask_op = PKE_MASKREG_GET(me, mask_index, i); | |
1185 | unsigned_4* masked_value = NULL; | |
1186 | unsigned_4 zero = 0; | |
1187 | ||
1188 | switch(mask_op) | |
1189 | { | |
1190 | case PKE_MASKREG_INPUT: | |
1191 | /* for vn == 0, all columns are copied from column 0 */ | |
1192 | if(vn == 0) | |
1193 | masked_value = & unpacked_data[0]; | |
1194 | else if(i > vn) | |
1195 | masked_value = & zero; /* XXX: what to put here? */ | |
1196 | else | |
1197 | masked_value = & unpacked_data[i]; | |
1198 | break; | |
1199 | ||
1200 | case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */ | |
1201 | masked_value = & me->regs[PKE_REG_R0 + i][0]; | |
1202 | break; | |
1203 | ||
1204 | case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */ | |
1205 | masked_value = & me->regs[PKE_REG_C0 + PKE_LIMIT(vector_num,3)][0]; | |
1206 | break; | |
1207 | ||
1208 | case PKE_MASKREG_NOTHING: | |
1209 | /* "write inhibit" by re-copying old data */ | |
1210 | masked_value = & vu_old_data[i]; | |
1211 | break; | |
1212 | ||
1213 | default: | |
1214 | ASSERT(0); | |
1215 | /* no other cases possible */ | |
1216 | } | |
1217 | ||
1218 | /* copy masked value for column */ | |
1219 | memcpy(& vu_new_data[i], masked_value, sizeof(unsigned_4)); | |
1220 | } /* loop over columns */ | |
1221 | } | |
1222 | else | |
1223 | { | |
1224 | /* no mask - just copy over entire unpacked quadword */ | |
1225 | memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data)); | |
1226 | } | |
1227 | ||
1228 | /* process STMOD register for accumulation operations */ | |
1229 | switch(PKE_REG_MASK_GET(me, MODE, MDE)) | |
1230 | { | |
1231 | case PKE_MODE_ADDROW: /* add row registers to output data */ | |
1232 | for(i=0; i<4; i++) | |
1233 | /* exploit R0..R3 contiguity */ | |
1234 | vu_new_data[i] += me->regs[PKE_REG_R0 + i][0]; | |
1235 | break; | |
1236 | ||
1237 | case PKE_MODE_ACCROW: /* add row registers to output data; accumulate */ | |
1238 | for(i=0; i<4; i++) | |
1239 | { | |
1240 | /* exploit R0..R3 contiguity */ | |
1241 | vu_new_data[i] += me->regs[PKE_REG_R0 + i][0]; | |
1242 | me->regs[PKE_REG_R0 + i][0] = vu_new_data[i]; | |
1243 | } | |
1244 | break; | |
1245 | ||
1246 | case PKE_MODE_INPUT: /* pass data through */ | |
1247 | default: | |
1248 | ; | |
1249 | } | |
1250 | ||
1251 | /* write replacement word */ | |
1252 | pke_track_write(me, vu_new_data, sizeof(vu_new_data), | |
1253 | (SIM_ADDR) vu_addr, fq->source_address); | |
1254 | ||
1255 | /* next vector please */ | |
1256 | vector_num ++; | |
1257 | } /* vector transfer loop */ | |
1258 | } /* PKE FIFO full enough */ | |
1259 | else | |
1260 | { | |
1261 | /* need to wait for another word */ | |
1262 | next_pps_state = PKE_REG_STAT_PPS_WAIT; | |
1263 | /* retry this instruction next clock */ | |
1264 | } | |
1265 | } | |
1266 | /* ... */ | |
1267 | else | |
1268 | { | |
1269 | /* set ER1 flag in STAT register */ | |
1270 | PKE_REG_MASK_SET(me, STAT, ER1, 1); | |
1271 | /* advance over faulty word */ | |
1272 | pke_pc_advance(me, 1); | |
1273 | } | |
1274 | ||
1275 | /* PKE is now idle or waiting */ | |
1276 | PKE_REG_MASK_SET(me, STAT, PPS, next_pps_state); | |
1277 | } | |
1278 | ||
1279 | ||
1280 | ||
1281 | ||
1282 | ||
1283 | ||
1284 | /* advance the PC by given number of words; update STAT/FQC field */ | |
1285 | ||
1286 | void | |
1287 | pke_pc_advance(struct pke_device* me, int num_words) | |
1288 | { | |
1289 | ASSERT(num_words > 0); | |
1290 | ||
1291 | me->qw_pc += num_words; | |
1292 | /* handle overflow */ | |
1293 | while(me->qw_pc >= 4) | |
1294 | { | |
1295 | me->qw_pc -= 4; | |
1296 | me->fifo_pc ++; | |
1297 | } | |
1298 | ||
1299 | /* clear FQC if FIFO is now empty */ | |
1300 | if(me->fifo_num_elements == me->fifo_pc) | |
1301 | { | |
1302 | PKE_REG_MASK_SET(me, STAT, FQC, 0); | |
1303 | } | |
1304 | ||
1305 | } | |
1306 | ||
1307 | ||
1308 | ||
1309 | /* Return pointer to given operand# in FIFO. `word_num' starts at 1. | |
1310 | If FIFO is not full enough, return 0. */ | |
1311 | ||
1312 | unsigned_4* | |
1313 | pke_pc_operand(struct pke_device* me, int word_num) | |
1314 | { | |
52793fab IC |
1315 | int new_qw_pc = 0; |
1316 | int new_fifo_pc; | |
fba9bfed FCE |
1317 | unsigned_4* operand; |
1318 | ||
1319 | ASSERT(word_num > 0); | |
1320 | ||
1321 | new_fifo_pc = me->fifo_pc; | |
1322 | new_qw_pc += me->qw_pc + word_num; | |
1323 | ||
1324 | /* handle overflow */ | |
1325 | while(new_qw_pc >= 4) | |
1326 | { | |
1327 | new_qw_pc -= 4; | |
1328 | new_fifo_pc ++; | |
1329 | } | |
1330 | ||
1331 | /* not enough elements */ | |
1332 | if(me->fifo_num_elements == me->fifo_pc) | |
1333 | operand = NULL; | |
1334 | else | |
1335 | operand = & me->fifo[new_fifo_pc].data[new_qw_pc]; | |
1336 | ||
1337 | return operand; | |
1338 | } | |
1339 | ||
1340 | ||
1341 | ||
1342 | /* Return pointer to FIFO quadword containing given operand# in FIFO. | |
1343 | `word_num' starts at 1. If FIFO is not full enough, return 0. */ | |
1344 | ||
1345 | struct fifo_quadword* | |
1346 | pke_pc_fifo(struct pke_device* me, int word_num) | |
1347 | { | |
52793fab IC |
1348 | int new_qw_pc = 0; |
1349 | int new_fifo_pc; | |
fba9bfed | 1350 | struct fifo_quadword* operand; |
aea481da | 1351 | |
fba9bfed | 1352 | ASSERT(word_num > 0); |
aea481da | 1353 | |
fba9bfed FCE |
1354 | new_fifo_pc = me->fifo_pc; |
1355 | new_qw_pc += me->qw_pc + word_num; | |
1356 | ||
1357 | /* handle overflow */ | |
1358 | while(new_qw_pc >= 4) | |
1359 | { | |
1360 | new_qw_pc -= 4; | |
1361 | new_fifo_pc ++; | |
1362 | } | |
1363 | ||
1364 | /* not enough elements */ | |
1365 | if(me->fifo_num_elements == me->fifo_pc) | |
1366 | operand = NULL; | |
1367 | else | |
1368 | operand = & me->fifo[new_fifo_pc]; | |
1369 | ||
1370 | return operand; | |
aea481da DE |
1371 | } |
1372 | ||
1373 | ||
fba9bfed FCE |
1374 | |
1375 | /* Write a bunch of bytes into simulator memory. Store the given source address into the | |
1376 | PKE sourceaddr tracking word. */ | |
1377 | int | |
1378 | pke_track_write(struct pke_device* me, const void* src, int len, | |
1379 | address_word dest, unsigned_4 sourceaddr) | |
1380 | { | |
1381 | int rc; | |
1382 | unsigned_4 no_sourceaddr = 0; | |
1383 | ||
1384 | /* write srcaddr into PKE srcaddr tracking */ | |
1385 | sim_write(NULL, | |
1386 | (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR, | |
1387 | (void*) & sourceaddr, | |
1388 | sizeof(unsigned_4)); | |
1389 | ||
1390 | /* write bytes into simulator */ | |
1391 | rc = sim_write(NULL, | |
1392 | (SIM_ADDR) dest, | |
1393 | (void*) src, | |
1394 | len); | |
1395 | ||
1396 | /* clear srcaddr from PKE srcaddr tracking */ | |
1397 | sim_write(NULL, | |
1398 | (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR, | |
1399 | (void*) & no_sourceaddr, | |
1400 | sizeof(unsigned_4)); | |
1401 | ||
1402 | return rc; | |
1403 | } |