Commit | Line | Data |
---|---|---|
aea481da DE |
1 | /* Copyright (C) 1998, Cygnus Solutions */ |
2 | ||
e2306992 | 3 | |
10572b6a AC |
4 | #include "config.h" |
5 | ||
aea481da | 6 | #include <stdlib.h> |
15232df4 | 7 | #include "sim-main.h" |
db6dac32 | 8 | #include "sim-bits.h" |
fba9bfed | 9 | #include "sim-assert.h" |
15232df4 FCE |
10 | #include "sky-pke.h" |
11 | #include "sky-dma.h" | |
12 | #include "sky-vu.h" | |
fba9bfed | 13 | #include "sky-gpuif.h" |
fd909089 | 14 | #include "sky-device.h" |
aea481da | 15 | |
15232df4 | 16 | |
10572b6a AC |
17 | #ifdef HAVE_STRING_H |
18 | #include <string.h> | |
19 | #else | |
20 | #ifdef HAVE_STRINGS_H | |
21 | #include <strings.h> | |
22 | #endif | |
23 | #endif | |
24 | ||
aea481da DE |
25 | |
26 | /* Internal function declarations */ | |
27 | ||
28 | static int pke_io_read_buffer(device*, void*, int, address_word, | |
29 | unsigned, sim_cpu*, sim_cia); | |
30 | static int pke_io_write_buffer(device*, const void*, int, address_word, | |
31 | unsigned, sim_cpu*, sim_cia); | |
9614fb3c | 32 | static void pke_reset(struct pke_device*); |
e2306992 | 33 | static void pke_issue(SIM_DESC, struct pke_device*); |
fba9bfed | 34 | static void pke_pc_advance(struct pke_device*, int num_words); |
9614fb3c FCE |
35 | static struct fifo_quadword* pke_pcrel_fifo(struct pke_device*, int operand_num, |
36 | unsigned_4** operand); | |
37 | static unsigned_4* pke_pcrel_operand(struct pke_device*, int operand_num); | |
38 | static unsigned_4 pke_pcrel_operand_bits(struct pke_device*, int bit_offset, | |
39 | int bit_width, unsigned_4* sourceaddr); | |
fba9bfed | 40 | static void pke_attach(SIM_DESC sd, struct pke_device* me); |
43a6998b FCE |
41 | enum pke_check_target { chk_vu, chk_path1, chk_path2, chk_path3 }; |
42 | static int pke_check_stall(struct pke_device* me, enum pke_check_target what); | |
43 | static void pke_flip_dbf(struct pke_device* me); | |
9614fb3c | 44 | static void pke_begin_interrupt_stall(struct pke_device* me); |
43a6998b FCE |
45 | /* PKEcode handlers */ |
46 | static void pke_code_nop(struct pke_device* me, unsigned_4 pkecode); | |
47 | static void pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode); | |
48 | static void pke_code_offset(struct pke_device* me, unsigned_4 pkecode); | |
49 | static void pke_code_base(struct pke_device* me, unsigned_4 pkecode); | |
50 | static void pke_code_itop(struct pke_device* me, unsigned_4 pkecode); | |
51 | static void pke_code_stmod(struct pke_device* me, unsigned_4 pkecode); | |
52 | static void pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode); | |
53 | static void pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode); | |
54 | static void pke_code_flushe(struct pke_device* me, unsigned_4 pkecode); | |
55 | static void pke_code_flush(struct pke_device* me, unsigned_4 pkecode); | |
56 | static void pke_code_flusha(struct pke_device* me, unsigned_4 pkecode); | |
57 | static void pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode); | |
58 | static void pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode); | |
59 | static void pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode); | |
60 | static void pke_code_stmask(struct pke_device* me, unsigned_4 pkecode); | |
61 | static void pke_code_strow(struct pke_device* me, unsigned_4 pkecode); | |
62 | static void pke_code_stcol(struct pke_device* me, unsigned_4 pkecode); | |
63 | static void pke_code_mpg(struct pke_device* me, unsigned_4 pkecode); | |
64 | static void pke_code_direct(struct pke_device* me, unsigned_4 pkecode); | |
65 | static void pke_code_directhl(struct pke_device* me, unsigned_4 pkecode); | |
66 | static void pke_code_unpack(struct pke_device* me, unsigned_4 pkecode); | |
67 | static void pke_code_error(struct pke_device* me, unsigned_4 pkecode); | |
15232df4 FCE |
68 | unsigned_4 pke_fifo_flush(struct pke_fifo*); |
69 | void pke_fifo_reset(struct pke_fifo*); | |
70 | struct fifo_quadword* pke_fifo_fit(struct pke_fifo*); | |
71 | struct fifo_quadword* pke_fifo_access(struct pke_fifo*, unsigned_4 qwnum); | |
72 | void pke_fifo_old(struct pke_fifo*, unsigned_4 qwnum); | |
fba9bfed | 73 | |
aea481da DE |
74 | |
75 | ||
76 | /* Static data */ | |
77 | ||
78 | struct pke_device pke0_device = | |
79 | { | |
80 | { "pke0", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */ | |
81 | 0, 0, /* ID, flags */ | |
aea481da | 82 | {}, /* regs */ |
db6dac32 | 83 | {}, 0, /* FIFO write buffer */ |
9614fb3c FCE |
84 | { NULL, 0, 0, 0 }, /* FIFO */ |
85 | NULL, /* FIFO trace file */ | |
121d6745 | 86 | -1, -1, 0, 0, 0, /* invalid FIFO cache */ |
fba9bfed | 87 | 0, 0 /* pc */ |
aea481da DE |
88 | }; |
89 | ||
90 | ||
91 | struct pke_device pke1_device = | |
92 | { | |
93 | { "pke1", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */ | |
94 | 1, 0, /* ID, flags */ | |
aea481da | 95 | {}, /* regs */ |
db6dac32 | 96 | {}, 0, /* FIFO write buffer */ |
9614fb3c FCE |
97 | { NULL, 0, 0, 0 }, /* FIFO */ |
98 | NULL, /* FIFO trace file */ | |
121d6745 | 99 | -1, -1, 0, 0, 0, /* invalid FIFO cache */ |
fba9bfed | 100 | 0, 0 /* pc */ |
aea481da DE |
101 | }; |
102 | ||
103 | ||
104 | ||
105 | /* External functions */ | |
106 | ||
107 | ||
fba9bfed | 108 | /* Attach PKE addresses to main memory */ |
aea481da DE |
109 | |
110 | void | |
111 | pke0_attach(SIM_DESC sd) | |
112 | { | |
fba9bfed | 113 | pke_attach(sd, & pke0_device); |
9614fb3c | 114 | pke_reset(& pke0_device); |
fba9bfed | 115 | } |
aea481da | 116 | |
fba9bfed FCE |
117 | void |
118 | pke1_attach(SIM_DESC sd) | |
119 | { | |
120 | pke_attach(sd, & pke1_device); | |
9614fb3c | 121 | pke_reset(& pke1_device); |
aea481da DE |
122 | } |
123 | ||
124 | ||
fba9bfed FCE |
125 | |
126 | /* Issue a PKE instruction if possible */ | |
aea481da DE |
127 | |
128 | void | |
e2306992 | 129 | pke0_issue(SIM_DESC sd) |
aea481da | 130 | { |
e2306992 | 131 | pke_issue(sd, & pke0_device); |
fba9bfed FCE |
132 | } |
133 | ||
134 | void | |
e2306992 | 135 | pke1_issue(SIM_DESC sd) |
fba9bfed | 136 | { |
653c2590 | 137 | pke_issue(sd, & pke1_device); |
fba9bfed FCE |
138 | } |
139 | ||
140 | ||
141 | ||
142 | /* Internal functions */ | |
143 | ||
144 | ||
145 | /* Attach PKE memory regions to simulator */ | |
146 | ||
147 | void | |
148 | pke_attach(SIM_DESC sd, struct pke_device* me) | |
149 | { | |
150 | /* register file */ | |
e2306992 | 151 | sim_core_attach (sd, NULL, 0, access_read_write, 0, |
fba9bfed | 152 | (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START, |
aea481da DE |
153 | PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/, |
154 | 0 /*modulo*/, | |
e2306992 | 155 | (device*) me, |
aea481da DE |
156 | NULL /*buffer*/); |
157 | ||
fba9bfed | 158 | /* FIFO port */ |
e2306992 | 159 | sim_core_attach (sd, NULL, 0, access_read_write, 0, |
fba9bfed | 160 | (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR, |
aea481da DE |
161 | sizeof(quadword) /*nr_bytes*/, |
162 | 0 /*modulo*/, | |
e2306992 | 163 | (device*) me, |
aea481da | 164 | NULL /*buffer*/); |
aea481da | 165 | |
534a3d5c | 166 | /* VU MEM0 tracking table */ |
e2306992 | 167 | sim_core_attach (sd, NULL, 0, access_read_write, 0, |
534a3d5c FCE |
168 | ((me->pke_number == 0) ? VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START), |
169 | ((me->pke_number == 0) ? VU0_MEM0_SIZE : VU1_MEM0_SIZE) / 2, | |
fba9bfed | 170 | 0 /*modulo*/, |
534a3d5c FCE |
171 | NULL, |
172 | NULL /*buffer*/); | |
173 | ||
174 | /* VU MEM1 tracking table */ | |
175 | sim_core_attach (sd, NULL, 0, access_read_write, 0, | |
176 | ((me->pke_number == 0) ? VU0_MEM1_SRCADDR_START : VU1_MEM1_SRCADDR_START), | |
177 | ((me->pke_number == 0) ? VU0_MEM1_SIZE : VU1_MEM1_SIZE) / 4, | |
178 | 0 /*modulo*/, | |
179 | NULL, | |
180 | NULL /*buffer*/); | |
181 | ||
e2306992 FCE |
182 | |
183 | /* attach to trace file if appropriate */ | |
184 | { | |
185 | char trace_envvar[80]; | |
186 | char* trace_filename = NULL; | |
187 | sprintf(trace_envvar, "VIF%d_TRACE_FILE", me->pke_number); | |
188 | trace_filename = getenv(trace_envvar); | |
189 | if(trace_filename != NULL) | |
190 | { | |
191 | me->fifo_trace_file = fopen(trace_filename, "w"); | |
192 | if(me->fifo_trace_file == NULL) | |
e2306992 | 193 | perror("VIF FIFO trace error on fopen"); |
b4d2f483 FCE |
194 | else |
195 | setvbuf(me->fifo_trace_file, NULL, _IOLBF, 0); | |
e2306992 FCE |
196 | } |
197 | } | |
aea481da DE |
198 | } |
199 | ||
200 | ||
201 | ||
aea481da DE |
202 | /* Handle a PKE read; return no. of bytes read */ |
203 | ||
204 | int | |
205 | pke_io_read_buffer(device *me_, | |
206 | void *dest, | |
207 | int space, | |
208 | address_word addr, | |
209 | unsigned nr_bytes, | |
fba9bfed | 210 | sim_cpu *cpu, |
aea481da DE |
211 | sim_cia cia) |
212 | { | |
213 | /* downcast to gather embedding pke_device struct */ | |
214 | struct pke_device* me = (struct pke_device*) me_; | |
215 | ||
fba9bfed FCE |
216 | /* find my address ranges */ |
217 | address_word my_reg_start = | |
218 | (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START; | |
219 | address_word my_fifo_addr = | |
220 | (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR; | |
221 | ||
aea481da DE |
222 | /* enforce that an access does not span more than one quadword */ |
223 | address_word low = ADDR_TRUNC_QW(addr); | |
224 | address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1); | |
225 | if(low != high) | |
226 | return 0; | |
227 | ||
228 | /* classify address & handle */ | |
fba9bfed | 229 | if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE)) |
aea481da DE |
230 | { |
231 | /* register bank */ | |
fba9bfed FCE |
232 | int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4; |
233 | int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */ | |
aea481da | 234 | int readable = 1; |
fba9bfed FCE |
235 | quadword result; |
236 | ||
237 | /* clear result */ | |
238 | result[0] = result[1] = result[2] = result[3] = 0; | |
aea481da | 239 | |
fba9bfed | 240 | /* handle reads to individual registers; clear `readable' on error */ |
aea481da DE |
241 | switch(reg_num) |
242 | { | |
fba9bfed FCE |
243 | /* handle common case of register reading, side-effect free */ |
244 | /* PKE1-only registers*/ | |
aea481da DE |
245 | case PKE_REG_BASE: |
246 | case PKE_REG_OFST: | |
247 | case PKE_REG_TOPS: | |
248 | case PKE_REG_TOP: | |
249 | case PKE_REG_DBF: | |
fba9bfed | 250 | if(me->pke_number == 0) |
aea481da | 251 | readable = 0; |
fba9bfed FCE |
252 | /* fall through */ |
253 | /* PKE0 & PKE1 common registers*/ | |
254 | case PKE_REG_STAT: | |
255 | case PKE_REG_ERR: | |
256 | case PKE_REG_MARK: | |
257 | case PKE_REG_CYCLE: | |
258 | case PKE_REG_MODE: | |
259 | case PKE_REG_NUM: | |
260 | case PKE_REG_MASK: | |
261 | case PKE_REG_CODE: | |
262 | case PKE_REG_ITOPS: | |
263 | case PKE_REG_ITOP: | |
264 | case PKE_REG_R0: | |
265 | case PKE_REG_R1: | |
266 | case PKE_REG_R2: | |
267 | case PKE_REG_R3: | |
268 | case PKE_REG_C0: | |
269 | case PKE_REG_C1: | |
270 | case PKE_REG_C2: | |
271 | case PKE_REG_C3: | |
f0bb94cd | 272 | result[0] = H2T_4(me->regs[reg_num][0]); |
fba9bfed FCE |
273 | break; |
274 | ||
275 | /* handle common case of write-only registers */ | |
276 | case PKE_REG_FBRST: | |
277 | readable = 0; | |
278 | break; | |
279 | ||
280 | default: | |
281 | ASSERT(0); /* test above should prevent this possibility */ | |
aea481da DE |
282 | } |
283 | ||
fba9bfed | 284 | /* perform transfer & return */ |
aea481da DE |
285 | if(readable) |
286 | { | |
aea481da | 287 | /* copy the bits */ |
fba9bfed | 288 | memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes); |
aea481da | 289 | /* okay */ |
aea481da DE |
290 | } |
291 | else | |
292 | { | |
b4d2f483 FCE |
293 | /* return zero bits */ |
294 | memset(dest, 0, nr_bytes); | |
aea481da DE |
295 | } |
296 | ||
b4d2f483 | 297 | return nr_bytes; |
aea481da DE |
298 | /* NOTREACHED */ |
299 | } | |
fba9bfed FCE |
300 | else if(addr >= my_fifo_addr && |
301 | addr < my_fifo_addr + sizeof(quadword)) | |
aea481da DE |
302 | { |
303 | /* FIFO */ | |
304 | ||
fba9bfed FCE |
305 | /* FIFO is not readable: return a word of zeroes */ |
306 | memset(dest, 0, nr_bytes); | |
307 | return nr_bytes; | |
aea481da DE |
308 | } |
309 | ||
310 | /* NOTREACHED */ | |
fba9bfed | 311 | return 0; |
aea481da DE |
312 | } |
313 | ||
314 | ||
315 | /* Handle a PKE read; return no. of bytes written */ | |
316 | ||
317 | int | |
318 | pke_io_write_buffer(device *me_, | |
319 | const void *src, | |
320 | int space, | |
321 | address_word addr, | |
322 | unsigned nr_bytes, | |
fba9bfed | 323 | sim_cpu *cpu, |
aea481da DE |
324 | sim_cia cia) |
325 | { | |
326 | /* downcast to gather embedding pke_device struct */ | |
327 | struct pke_device* me = (struct pke_device*) me_; | |
328 | ||
fba9bfed FCE |
329 | /* find my address ranges */ |
330 | address_word my_reg_start = | |
331 | (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START; | |
332 | address_word my_fifo_addr = | |
333 | (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR; | |
334 | ||
aea481da DE |
335 | /* enforce that an access does not span more than one quadword */ |
336 | address_word low = ADDR_TRUNC_QW(addr); | |
337 | address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1); | |
338 | if(low != high) | |
339 | return 0; | |
340 | ||
341 | /* classify address & handle */ | |
fba9bfed | 342 | if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE)) |
aea481da DE |
343 | { |
344 | /* register bank */ | |
fba9bfed FCE |
345 | int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4; |
346 | int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */ | |
aea481da | 347 | int writeable = 1; |
fba9bfed FCE |
348 | quadword input; |
349 | ||
350 | /* clear input */ | |
351 | input[0] = input[1] = input[2] = input[3] = 0; | |
aea481da | 352 | |
fba9bfed FCE |
353 | /* write user-given bytes into input */ |
354 | memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes); | |
355 | ||
f0bb94cd FCE |
356 | /* make words host-endian */ |
357 | input[0] = T2H_4(input[0]); | |
358 | /* we may ignore other words */ | |
359 | ||
fba9bfed | 360 | /* handle writes to individual registers; clear `writeable' on error */ |
aea481da DE |
361 | switch(reg_num) |
362 | { | |
fba9bfed | 363 | case PKE_REG_FBRST: |
43a6998b FCE |
364 | /* Order these tests from least to most overriding, in case |
365 | multiple bits are set. */ | |
b4d2f483 | 366 | if(BIT_MASK_GET(input[0], PKE_REG_FBRST_STC_B, PKE_REG_FBRST_STC_E)) |
fba9bfed FCE |
367 | { |
368 | /* clear a bunch of status bits */ | |
369 | PKE_REG_MASK_SET(me, STAT, PSS, 0); | |
370 | PKE_REG_MASK_SET(me, STAT, PFS, 0); | |
371 | PKE_REG_MASK_SET(me, STAT, PIS, 0); | |
372 | PKE_REG_MASK_SET(me, STAT, INT, 0); | |
373 | PKE_REG_MASK_SET(me, STAT, ER0, 0); | |
374 | PKE_REG_MASK_SET(me, STAT, ER1, 0); | |
db6dac32 | 375 | me->flags &= ~PKE_FLAG_PENDING_PSS; |
fba9bfed FCE |
376 | /* will allow resumption of possible stalled instruction */ |
377 | } | |
b4d2f483 | 378 | if(BIT_MASK_GET(input[0], PKE_REG_FBRST_STP_B, PKE_REG_FBRST_STP_E)) |
43a6998b | 379 | { |
db6dac32 | 380 | me->flags |= PKE_FLAG_PENDING_PSS; |
43a6998b | 381 | } |
b4d2f483 | 382 | if(BIT_MASK_GET(input[0], PKE_REG_FBRST_FBK_B, PKE_REG_FBRST_FBK_E)) |
43a6998b FCE |
383 | { |
384 | PKE_REG_MASK_SET(me, STAT, PFS, 1); | |
385 | } | |
b4d2f483 | 386 | if(BIT_MASK_GET(input[0], PKE_REG_FBRST_RST_B, PKE_REG_FBRST_RST_E)) |
43a6998b | 387 | { |
9614fb3c | 388 | pke_reset(me); |
43a6998b | 389 | } |
fba9bfed FCE |
390 | break; |
391 | ||
392 | case PKE_REG_ERR: | |
393 | /* copy bottom three bits */ | |
394 | BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input[0], 0, 2)); | |
395 | break; | |
396 | ||
397 | case PKE_REG_MARK: | |
398 | /* copy bottom sixteen bits */ | |
399 | PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input[0], 0, 15)); | |
400 | /* reset MRK bit in STAT */ | |
401 | PKE_REG_MASK_SET(me, STAT, MRK, 0); | |
402 | break; | |
403 | ||
404 | /* handle common case of read-only registers */ | |
405 | /* PKE1-only registers - not really necessary to handle separately */ | |
aea481da DE |
406 | case PKE_REG_BASE: |
407 | case PKE_REG_OFST: | |
408 | case PKE_REG_TOPS: | |
409 | case PKE_REG_TOP: | |
410 | case PKE_REG_DBF: | |
fba9bfed | 411 | if(me->pke_number == 0) |
aea481da | 412 | writeable = 0; |
fba9bfed FCE |
413 | /* fall through */ |
414 | /* PKE0 & PKE1 common registers*/ | |
415 | case PKE_REG_STAT: | |
416 | /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */ | |
417 | case PKE_REG_CYCLE: | |
418 | case PKE_REG_MODE: | |
419 | case PKE_REG_NUM: | |
420 | case PKE_REG_MASK: | |
421 | case PKE_REG_CODE: | |
422 | case PKE_REG_ITOPS: | |
423 | case PKE_REG_ITOP: | |
424 | case PKE_REG_R0: | |
425 | case PKE_REG_R1: | |
426 | case PKE_REG_R2: | |
427 | case PKE_REG_R3: | |
428 | case PKE_REG_C0: | |
429 | case PKE_REG_C1: | |
430 | case PKE_REG_C2: | |
431 | case PKE_REG_C3: | |
432 | writeable = 0; | |
433 | break; | |
434 | ||
435 | default: | |
436 | ASSERT(0); /* test above should prevent this possibility */ | |
aea481da DE |
437 | } |
438 | ||
fba9bfed | 439 | /* perform return */ |
b4d2f483 | 440 | if(! writeable) |
aea481da | 441 | { |
b4d2f483 | 442 | ; /* error */ |
aea481da DE |
443 | } |
444 | ||
b4d2f483 FCE |
445 | return nr_bytes; |
446 | ||
aea481da DE |
447 | /* NOTREACHED */ |
448 | } | |
fba9bfed FCE |
449 | else if(addr >= my_fifo_addr && |
450 | addr < my_fifo_addr + sizeof(quadword)) | |
aea481da DE |
451 | { |
452 | /* FIFO */ | |
fba9bfed | 453 | struct fifo_quadword* fqw; |
db6dac32 | 454 | int fifo_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside fifo quadword */ |
e2306992 | 455 | unsigned_4 dma_tag_present = 0; |
db6dac32 FCE |
456 | int i; |
457 | ||
f0bb94cd | 458 | /* collect potentially-partial quadword in write buffer; LE byte order */ |
db6dac32 FCE |
459 | memcpy(((unsigned_1*)& me->fifo_qw_in_progress) + fifo_byte, src, nr_bytes); |
460 | /* mark bytes written */ | |
461 | for(i = fifo_byte; i < fifo_byte + nr_bytes; i++) | |
462 | BIT_MASK_SET(me->fifo_qw_done, i, i, 1); | |
463 | ||
464 | /* return if quadword not quite written yet */ | |
465 | if(BIT_MASK_GET(me->fifo_qw_done, 0, sizeof(quadword)-1) != | |
e2306992 | 466 | BIT_MASK_BTW(0, sizeof(quadword)-1)) |
db6dac32 | 467 | return nr_bytes; |
aea481da | 468 | |
db6dac32 FCE |
469 | /* all done - process quadword after clearing flag */ |
470 | BIT_MASK_SET(me->fifo_qw_done, 0, sizeof(quadword)-1, 0); | |
aea481da | 471 | |
9614fb3c FCE |
472 | /* allocate required address in FIFO */ |
473 | fqw = pke_fifo_fit(& me->fifo); | |
474 | ASSERT(fqw != NULL); | |
aea481da | 475 | |
9614fb3c | 476 | /* fill in unclassified FIFO quadword data in host byte order */ |
e2306992 FCE |
477 | fqw->word_class[0] = fqw->word_class[1] = |
478 | fqw->word_class[2] = fqw->word_class[3] = wc_unknown; | |
f0bb94cd FCE |
479 | fqw->data[0] = T2H_4(me->fifo_qw_in_progress[0]); |
480 | fqw->data[1] = T2H_4(me->fifo_qw_in_progress[1]); | |
481 | fqw->data[2] = T2H_4(me->fifo_qw_in_progress[2]); | |
482 | fqw->data[3] = T2H_4(me->fifo_qw_in_progress[3]); | |
9614fb3c FCE |
483 | |
484 | /* read DMAC-supplied indicators */ | |
e2306992 | 485 | ASSERT(sizeof(unsigned_4) == 4); |
534a3d5c | 486 | PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_MADR : DMA_D1_MADR), |
f0bb94cd | 487 | & fqw->source_address, /* converted to host-endian */ |
e2306992 | 488 | 4); |
534a3d5c | 489 | PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_PKTFLAG : DMA_D1_PKTFLAG), |
e2306992 FCE |
490 | & dma_tag_present, |
491 | 4); | |
492 | ||
493 | if(dma_tag_present) | |
494 | { | |
495 | /* lower two words are DMA tags */ | |
496 | fqw->word_class[0] = fqw->word_class[1] = wc_dma; | |
497 | } | |
498 | ||
fba9bfed FCE |
499 | /* set FQC to "1" as FIFO is now not empty */ |
500 | PKE_REG_MASK_SET(me, STAT, FQC, 1); | |
aea481da DE |
501 | |
502 | /* okay */ | |
503 | return nr_bytes; | |
504 | } | |
505 | ||
506 | /* NOTREACHED */ | |
fba9bfed | 507 | return 0; |
aea481da DE |
508 | } |
509 | ||
510 | ||
511 | ||
9614fb3c FCE |
512 | /* Reset the PKE */ |
513 | void | |
514 | pke_reset(struct pke_device* me) | |
515 | { | |
516 | /* advance PC over last quadword in FIFO; keep previous FIFO history */ | |
517 | me->fifo_pc = pke_fifo_flush(& me->fifo); | |
518 | me->qw_pc = 0; | |
519 | /* clear registers, flag, other state */ | |
520 | memset(me->regs, 0, sizeof(me->regs)); | |
521 | me->fifo_qw_done = 0; | |
522 | me->flags = 0; | |
523 | } | |
524 | ||
525 | ||
526 | ||
fba9bfed | 527 | /* Issue & swallow next PKE opcode if possible/available */ |
aea481da DE |
528 | |
529 | void | |
e2306992 | 530 | pke_issue(SIM_DESC sd, struct pke_device* me) |
aea481da | 531 | { |
fba9bfed FCE |
532 | struct fifo_quadword* fqw; |
533 | unsigned_4 fw; | |
10572b6a | 534 | unsigned_4 cmd, intr; |
fba9bfed | 535 | |
fd909089 FCE |
536 | /* 1 -- fetch PKE instruction */ |
537 | ||
538 | /* confirm availability of new quadword of PKE instructions */ | |
9614fb3c FCE |
539 | fqw = pke_fifo_access(& me->fifo, me->fifo_pc); |
540 | if(fqw == NULL) | |
fd909089 FCE |
541 | return; |
542 | ||
543 | /* skip over DMA tag, if present */ | |
544 | pke_pc_advance(me, 0); | |
9614fb3c FCE |
545 | /* note: this can only change qw_pc from 0 to 2 and will not |
546 | invalidate fqw */ | |
fd909089 FCE |
547 | |
548 | /* "fetch" instruction quadword and word */ | |
fd909089 FCE |
549 | fw = fqw->data[me->qw_pc]; |
550 | ||
551 | /* store word in PKECODE register */ | |
552 | me->regs[PKE_REG_CODE][0] = fw; | |
553 | ||
554 | ||
555 | /* 2 -- test go / no-go for PKE execution */ | |
fba9bfed | 556 | |
db6dac32 FCE |
557 | /* switch on STAT:PSS if PSS-pending and in idle state */ |
558 | if((PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE) && | |
559 | (me->flags & PKE_FLAG_PENDING_PSS) != 0) | |
560 | { | |
561 | me->flags &= ~PKE_FLAG_PENDING_PSS; | |
562 | PKE_REG_MASK_SET(me, STAT, PSS, 1); | |
563 | } | |
564 | ||
fba9bfed | 565 | /* check for stall/halt control bits */ |
db6dac32 FCE |
566 | if(PKE_REG_MASK_GET(me, STAT, PFS) || |
567 | PKE_REG_MASK_GET(me, STAT, PSS) || /* note special treatment below */ | |
9614fb3c FCE |
568 | /* PEW bit not a reason to keep stalling - it's just an indication, re-computed below */ |
569 | /* PGW bit not a reason to keep stalling - it's just an indication, re-computed below */ | |
570 | /* ER0/ER1 not a reason to keep stalling - it's just an indication */ | |
fd909089 | 571 | PKE_REG_MASK_GET(me, STAT, PIS)) |
fba9bfed | 572 | { |
fd909089 FCE |
573 | /* (still) stalled */ |
574 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); | |
575 | /* try again next cycle */ | |
43a6998b | 576 | return; |
fba9bfed | 577 | } |
fba9bfed | 578 | |
fba9bfed FCE |
579 | |
580 | /* 3 -- decode PKE instruction */ | |
581 | ||
fd909089 FCE |
582 | /* decoding */ |
583 | if(PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE) | |
584 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE); | |
585 | ||
586 | /* Extract relevant bits from PKEcode */ | |
fba9bfed FCE |
587 | intr = BIT_MASK_GET(fw, PKE_OPCODE_I_B, PKE_OPCODE_I_E); |
588 | cmd = BIT_MASK_GET(fw, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E); | |
fba9bfed | 589 | |
d22ea5d0 | 590 | /* handle interrupts */ |
fba9bfed FCE |
591 | if(intr) |
592 | { | |
fd909089 | 593 | /* are we resuming an interrupt-stalled instruction? */ |
d22ea5d0 FCE |
594 | if(me->flags & PKE_FLAG_INT_NOLOOP) |
595 | { | |
596 | /* clear loop-prevention flag */ | |
597 | me->flags &= ~PKE_FLAG_INT_NOLOOP; | |
fd909089 FCE |
598 | |
599 | /* fall through to decode & execute */ | |
600 | /* The pke_code_* functions should not check the MSB in the | |
601 | pkecode. */ | |
d22ea5d0 FCE |
602 | } |
603 | else /* new interrupt-flagged instruction */ | |
604 | { | |
605 | /* set INT flag in STAT register */ | |
606 | PKE_REG_MASK_SET(me, STAT, INT, 1); | |
607 | /* set loop-prevention flag */ | |
608 | me->flags |= PKE_FLAG_INT_NOLOOP; | |
609 | ||
fd909089 FCE |
610 | /* set PIS if stall not masked */ |
611 | if(!PKE_REG_MASK_GET(me, ERR, MII)) | |
9614fb3c | 612 | pke_begin_interrupt_stall(me); |
fd909089 FCE |
613 | |
614 | /* suspend this instruction unless it's PKEMARK */ | |
615 | if(!IS_PKE_CMD(cmd, PKEMARK)) | |
616 | { | |
617 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); | |
618 | return; | |
619 | } | |
620 | else | |
621 | { | |
622 | ; /* fall through to decode & execute */ | |
623 | } | |
d22ea5d0 | 624 | } |
fba9bfed FCE |
625 | } |
626 | ||
5068e793 | 627 | |
43a6998b | 628 | /* decode & execute */ |
fd909089 | 629 | if(IS_PKE_CMD(cmd, PKENOP)) |
43a6998b | 630 | pke_code_nop(me, fw); |
fd909089 | 631 | else if(IS_PKE_CMD(cmd, STCYCL)) |
43a6998b | 632 | pke_code_stcycl(me, fw); |
fd909089 | 633 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, OFFSET)) |
43a6998b | 634 | pke_code_offset(me, fw); |
fd909089 | 635 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, BASE)) |
43a6998b | 636 | pke_code_base(me, fw); |
fd909089 | 637 | else if(IS_PKE_CMD(cmd, ITOP)) |
43a6998b | 638 | pke_code_itop(me, fw); |
fd909089 | 639 | else if(IS_PKE_CMD(cmd, STMOD)) |
43a6998b | 640 | pke_code_stmod(me, fw); |
fd909089 | 641 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3)) |
43a6998b | 642 | pke_code_mskpath3(me, fw); |
fba9bfed | 643 | else if(IS_PKE_CMD(cmd, PKEMARK)) |
43a6998b | 644 | pke_code_pkemark(me, fw); |
fd909089 | 645 | else if(IS_PKE_CMD(cmd, FLUSHE)) |
43a6998b | 646 | pke_code_flushe(me, fw); |
fd909089 | 647 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSH)) |
43a6998b | 648 | pke_code_flush(me, fw); |
fd909089 | 649 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA)) |
43a6998b | 650 | pke_code_flusha(me, fw); |
fd909089 | 651 | else if(IS_PKE_CMD(cmd, PKEMSCAL)) |
43a6998b | 652 | pke_code_pkemscal(me, fw); |
fd909089 | 653 | else if(IS_PKE_CMD(cmd, PKEMSCNT)) |
43a6998b | 654 | pke_code_pkemscnt(me, fw); |
fd909089 | 655 | else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF)) |
43a6998b | 656 | pke_code_pkemscalf(me, fw); |
fd909089 | 657 | else if(IS_PKE_CMD(cmd, STMASK)) |
43a6998b | 658 | pke_code_stmask(me, fw); |
fd909089 | 659 | else if(IS_PKE_CMD(cmd, STROW)) |
43a6998b | 660 | pke_code_strow(me, fw); |
fd909089 | 661 | else if(IS_PKE_CMD(cmd, STCOL)) |
43a6998b | 662 | pke_code_stcol(me, fw); |
fd909089 | 663 | else if(IS_PKE_CMD(cmd, MPG)) |
43a6998b | 664 | pke_code_mpg(me, fw); |
fd909089 | 665 | else if(IS_PKE_CMD(cmd, DIRECT)) |
43a6998b | 666 | pke_code_direct(me, fw); |
fd909089 | 667 | else if(IS_PKE_CMD(cmd, DIRECTHL)) |
43a6998b | 668 | pke_code_directhl(me, fw); |
fd909089 | 669 | else if(IS_PKE_CMD(cmd, UNPACK)) |
43a6998b | 670 | pke_code_unpack(me, fw); |
db6dac32 | 671 | /* ... no other commands ... */ |
43a6998b FCE |
672 | else |
673 | pke_code_error(me, fw); | |
674 | } | |
675 | ||
676 | ||
677 | ||
9614fb3c FCE |
678 | /* Clear out contents of FIFO; act as if it was empty. Return PC |
679 | pointing to one-past-last word. */ | |
680 | ||
681 | unsigned_4 | |
682 | pke_fifo_flush(struct pke_fifo* fifo) | |
683 | { | |
684 | /* don't modify any state! */ | |
685 | return fifo->origin + fifo->next; | |
686 | } | |
687 | ||
688 | ||
689 | ||
121d6745 FCE |
690 | /* Clear out contents of FIFO; make it really empty. */ |
691 | ||
692 | void | |
693 | pke_fifo_reset(struct pke_fifo* fifo) | |
694 | { | |
695 | int i; | |
696 | ||
697 | /* clear fifo quadwords */ | |
698 | for(i=0; i<fifo->next; i++) | |
699 | { | |
700 | zfree(fifo->quadwords[i]); | |
701 | fifo->quadwords[i] = NULL; | |
702 | } | |
703 | ||
704 | /* reset pointers */ | |
705 | fifo->origin = 0; | |
706 | fifo->next = 0; | |
707 | } | |
708 | ||
709 | ||
710 | ||
9614fb3c FCE |
711 | /* Make space for the next quadword in the FIFO. Allocate/enlarge |
712 | FIFO pointer block if necessary. Return a pointer to it. */ | |
713 | ||
714 | struct fifo_quadword* | |
715 | pke_fifo_fit(struct pke_fifo* fifo) | |
716 | { | |
717 | struct fifo_quadword* fqw; | |
718 | ||
719 | /* out of space on quadword pointer array? */ | |
720 | if(fifo->next == fifo->length) /* also triggered before fifo->quadwords allocated */ | |
721 | { | |
722 | struct fifo_quadword** new_qw; | |
723 | unsigned_4 new_length = fifo->length + PKE_FIFO_GROW_SIZE; | |
724 | ||
725 | /* allocate new pointer block */ | |
726 | new_qw = zalloc(new_length * sizeof(struct fifo_quadword*)); | |
727 | ASSERT(new_qw != NULL); | |
728 | ||
0b9843e5 FCE |
729 | /* copy over old contents, if any */ |
730 | if(fifo->quadwords != NULL) | |
731 | { | |
732 | /* copy over old pointers to beginning of new block */ | |
733 | memcpy(new_qw, fifo->quadwords, | |
734 | fifo->length * sizeof(struct fifo_quadword*)); | |
735 | ||
736 | /* free old block */ | |
737 | zfree(fifo->quadwords); | |
738 | } | |
9614fb3c FCE |
739 | |
740 | /* replace pointers & counts */ | |
741 | fifo->quadwords = new_qw; | |
742 | fifo->length = new_length; | |
743 | } | |
744 | ||
745 | /* sanity check */ | |
746 | ASSERT(fifo->quadwords != NULL); | |
747 | ||
748 | /* allocate new quadword from heap */ | |
749 | fqw = zalloc(sizeof(struct fifo_quadword)); | |
750 | ASSERT(fqw != NULL); | |
751 | ||
752 | /* push quadword onto fifo */ | |
753 | fifo->quadwords[fifo->next] = fqw; | |
754 | fifo->next++; | |
755 | return fqw; | |
756 | } | |
757 | ||
758 | ||
759 | ||
760 | /* Return a pointer to the FIFO quadword with given absolute index, or | |
761 | NULL if it is out of range */ | |
762 | ||
763 | struct fifo_quadword* | |
764 | pke_fifo_access(struct pke_fifo* fifo, unsigned_4 qwnum) | |
765 | { | |
766 | struct fifo_quadword* fqw; | |
767 | ||
768 | if((qwnum < fifo->origin) || /* before history */ | |
769 | (qwnum >= fifo->origin + fifo->next)) /* after last available quadword */ | |
770 | fqw = NULL; | |
771 | else | |
772 | { | |
773 | ASSERT(fifo->quadwords != NULL); /* must be allocated already */ | |
774 | fqw = fifo->quadwords[qwnum - fifo->origin]; /* pull out pointer from array */ | |
775 | ASSERT(fqw != NULL); /* must be allocated already */ | |
776 | } | |
777 | ||
778 | return fqw; | |
779 | } | |
780 | ||
781 | ||
782 | /* Authorize release of any FIFO entries older than given absolute quadword. */ | |
783 | void | |
784 | pke_fifo_old(struct pke_fifo* fifo, unsigned_4 qwnum) | |
785 | { | |
786 | /* do we have any too-old FIFO elements? */ | |
787 | if(fifo->origin + PKE_FIFO_ARCHEOLOGY < qwnum) | |
788 | { | |
789 | /* count quadwords to forget */ | |
790 | int horizon = qwnum - (fifo->origin + PKE_FIFO_ARCHEOLOGY); | |
791 | int i; | |
792 | ||
793 | /* free quadwords at indices below horizon */ | |
794 | for(i=0; i < horizon; i++) | |
795 | zfree(fifo->quadwords[i]); | |
796 | ||
797 | /* move surviving quadword pointers down to beginning of array */ | |
798 | for(i=horizon; i < fifo->next; i++) | |
799 | fifo->quadwords[i-horizon] = fifo->quadwords[i]; | |
800 | ||
801 | /* clear duplicate pointers */ | |
802 | for(i=fifo->next - horizon; i < fifo->next; i++) | |
803 | fifo->quadwords[i] = NULL; | |
804 | ||
805 | /* adjust FIFO pointers */ | |
806 | fifo->origin = fifo->origin + horizon; | |
807 | fifo->next = fifo->next - horizon; | |
808 | } | |
809 | } | |
810 | ||
811 | ||
812 | ||
813 | ||
43a6998b | 814 | /* advance the PC by given number of data words; update STAT/FQC |
e2306992 FCE |
815 | field; assume FIFO is filled enough; classify passed-over words; |
816 | write FIFO trace line */ | |
43a6998b FCE |
817 | |
818 | void | |
819 | pke_pc_advance(struct pke_device* me, int num_words) | |
820 | { | |
821 | int num = num_words; | |
e2306992 | 822 | struct fifo_quadword* fq = NULL; |
9614fb3c FCE |
823 | unsigned_4 old_fifo_pc = me->fifo_pc; |
824 | ||
e2306992 | 825 | ASSERT(num_words >= 0); |
43a6998b | 826 | |
b4d2f483 FCE |
827 | /* printf("pke %d pc_advance num_words %d\n", me->pke_number, num_words); */ |
828 | ||
829 | while(1) | |
fba9bfed | 830 | { |
9614fb3c FCE |
831 | /* find next quadword, if any */ |
832 | fq = pke_fifo_access(& me->fifo, me->fifo_pc); | |
43a6998b FCE |
833 | |
834 | /* skip over DMA tag words if present in word 0 or 1 */ | |
9614fb3c | 835 | if(fq != NULL && fq->word_class[me->qw_pc] == wc_dma) |
fba9bfed | 836 | { |
43a6998b FCE |
837 | /* skip by going around loop an extra time */ |
838 | num ++; | |
fba9bfed | 839 | } |
b4d2f483 FCE |
840 | |
841 | /* nothing left to skip / no DMA tag here */ | |
842 | if(num == 0) | |
843 | break; | |
e2306992 | 844 | |
9614fb3c FCE |
845 | /* we are supposed to skip existing words */ |
846 | ASSERT(fq != NULL); | |
847 | ||
b4d2f483 FCE |
848 | /* one word skipped */ |
849 | num --; | |
850 | ||
851 | /* point to next word */ | |
852 | me->qw_pc ++; | |
853 | if(me->qw_pc == 4) | |
e2306992 | 854 | { |
b4d2f483 FCE |
855 | me->qw_pc = 0; |
856 | me->fifo_pc ++; | |
e2306992 | 857 | |
b4d2f483 FCE |
858 | /* trace the consumption of the FIFO quadword we just skipped over */ |
859 | /* fq still points to it */ | |
860 | if(me->fifo_trace_file != NULL) | |
e2306992 | 861 | { |
b4d2f483 FCE |
862 | /* assert complete classification */ |
863 | ASSERT(fq->word_class[3] != wc_unknown); | |
864 | ASSERT(fq->word_class[2] != wc_unknown); | |
865 | ASSERT(fq->word_class[1] != wc_unknown); | |
866 | ASSERT(fq->word_class[0] != wc_unknown); | |
e2306992 | 867 | |
b4d2f483 FCE |
868 | /* print trace record */ |
869 | fprintf(me->fifo_trace_file, | |
870 | "%d 0x%08x_%08x_%08x_%08x 0x%08x %c%c%c%c\n", | |
871 | (me->pke_number == 0 ? 0 : 1), | |
872 | (unsigned) fq->data[3], (unsigned) fq->data[2], | |
873 | (unsigned) fq->data[1], (unsigned) fq->data[0], | |
874 | (unsigned) fq->source_address, | |
875 | fq->word_class[3], fq->word_class[2], | |
876 | fq->word_class[1], fq->word_class[0]); | |
877 | } | |
b4d2f483 FCE |
878 | } /* next quadword */ |
879 | } | |
43a6998b | 880 | |
9614fb3c FCE |
881 | /* age old entries before PC */ |
882 | if(me->fifo_pc != old_fifo_pc) | |
883 | { | |
884 | /* we advanced the fifo-pc; authorize disposal of anything | |
885 | before previous PKEcode */ | |
886 | pke_fifo_old(& me->fifo, old_fifo_pc); | |
887 | } | |
888 | ||
43a6998b | 889 | /* clear FQC if FIFO is now empty */ |
9614fb3c FCE |
890 | fq = pke_fifo_access(& me->fifo, me->fifo_pc); |
891 | if(fq == NULL) | |
fba9bfed | 892 | { |
43a6998b | 893 | PKE_REG_MASK_SET(me, STAT, FQC, 0); |
fba9bfed | 894 | } |
e2306992 FCE |
895 | else /* annote the word where the PC lands as an PKEcode */ |
896 | { | |
9614fb3c | 897 | ASSERT(fq->word_class[me->qw_pc] == wc_pkecode || fq->word_class[me->qw_pc] == wc_unknown); |
e2306992 FCE |
898 | fq->word_class[me->qw_pc] = wc_pkecode; |
899 | } | |
43a6998b | 900 | } |
fba9bfed | 901 | |
fba9bfed | 902 | |
fba9bfed | 903 | |
9614fb3c FCE |
904 | |
905 | ||
43a6998b FCE |
906 | /* Return pointer to FIFO quadword containing given operand# in FIFO. |
907 | `operand_num' starts at 1. Return pointer to operand word in last | |
908 | argument, if non-NULL. If FIFO is not full enough, return 0. | |
909 | Signal an ER0 indication upon skipping a DMA tag. */ | |
fba9bfed | 910 | |
43a6998b | 911 | struct fifo_quadword* |
9614fb3c | 912 | pke_pcrel_fifo(struct pke_device* me, int operand_num, unsigned_4** operand) |
43a6998b | 913 | { |
121d6745 | 914 | int num; |
43a6998b | 915 | int new_qw_pc, new_fifo_pc; |
e2306992 | 916 | struct fifo_quadword* fq = NULL; |
fba9bfed | 917 | |
121d6745 FCE |
918 | /* check for validity of last search results in cache */ |
919 | if(me->last_fifo_pc == me->fifo_pc && | |
920 | me->last_qw_pc == me->qw_pc && | |
921 | operand_num > me->last_num) | |
922 | { | |
923 | /* continue search from last stop */ | |
924 | new_fifo_pc = me->last_new_fifo_pc; | |
925 | new_qw_pc = me->last_new_qw_pc; | |
926 | num = operand_num - me->last_num; | |
927 | } | |
928 | else | |
929 | { | |
930 | /* start search from scratch */ | |
931 | new_fifo_pc = me->fifo_pc; | |
932 | new_qw_pc = me->qw_pc; | |
933 | num = operand_num; | |
934 | } | |
fba9bfed | 935 | |
121d6745 | 936 | ASSERT(num > 0); |
fba9bfed | 937 | |
9614fb3c | 938 | /* printf("pke %d pcrel_fifo operand_num %d\n", me->pke_number, operand_num); */ |
b4d2f483 | 939 | |
e2306992 | 940 | do |
43a6998b FCE |
941 | { |
942 | /* one word skipped */ | |
943 | num --; | |
fba9bfed | 944 | |
43a6998b FCE |
945 | /* point to next word */ |
946 | new_qw_pc ++; | |
947 | if(new_qw_pc == 4) | |
fba9bfed | 948 | { |
43a6998b FCE |
949 | new_qw_pc = 0; |
950 | new_fifo_pc ++; | |
fba9bfed | 951 | } |
fba9bfed | 952 | |
9614fb3c FCE |
953 | fq = pke_fifo_access(& me->fifo, new_fifo_pc); |
954 | ||
43a6998b | 955 | /* check for FIFO underflow */ |
9614fb3c FCE |
956 | if(fq == NULL) |
957 | break; | |
43a6998b FCE |
958 | |
959 | /* skip over DMA tag words if present in word 0 or 1 */ | |
e2306992 | 960 | if(fq->word_class[new_qw_pc] == wc_dma) |
fba9bfed | 961 | { |
9614fb3c FCE |
962 | /* set ER0 */ |
963 | PKE_REG_MASK_SET(me, STAT, ER0, 1); | |
964 | ||
43a6998b | 965 | /* mismatch error! */ |
fd909089 FCE |
966 | if(! PKE_REG_MASK_GET(me, ERR, ME0)) |
967 | { | |
9614fb3c | 968 | pke_begin_interrupt_stall(me); |
fd909089 FCE |
969 | /* don't stall just yet -- finish this instruction */ |
970 | /* the PPS_STALL state will be entered by pke_issue() next time */ | |
971 | } | |
43a6998b FCE |
972 | /* skip by going around loop an extra time */ |
973 | num ++; | |
fba9bfed FCE |
974 | } |
975 | } | |
e2306992 | 976 | while(num > 0); |
fba9bfed | 977 | |
43a6998b | 978 | /* return pointer to operand word itself */ |
e2306992 FCE |
979 | if(fq != NULL) |
980 | { | |
981 | *operand = & fq->data[new_qw_pc]; | |
fba9bfed | 982 | |
9614fb3c FCE |
983 | /* annote the word where the pseudo-PC lands as an PKE operand */ |
984 | ASSERT(fq->word_class[new_qw_pc] == wc_pkedata || fq->word_class[new_qw_pc] == wc_unknown); | |
e2306992 | 985 | fq->word_class[new_qw_pc] = wc_pkedata; |
121d6745 FCE |
986 | |
987 | /* store search results in cache */ | |
988 | /* keys */ | |
989 | me->last_fifo_pc = me->fifo_pc; | |
990 | me->last_qw_pc = me->qw_pc; | |
991 | /* values */ | |
992 | me->last_num = operand_num; | |
993 | me->last_new_fifo_pc = new_fifo_pc; | |
994 | me->last_new_qw_pc = new_qw_pc; | |
e2306992 FCE |
995 | } |
996 | ||
997 | return fq; | |
43a6998b | 998 | } |
fba9bfed | 999 | |
fba9bfed | 1000 | |
43a6998b FCE |
1001 | /* Return pointer to given operand# in FIFO. `operand_num' starts at 1. |
1002 | If FIFO is not full enough, return 0. Skip over DMA tags, but mark | |
1003 | them as an error (ER0). */ | |
fba9bfed | 1004 | |
43a6998b | 1005 | unsigned_4* |
9614fb3c | 1006 | pke_pcrel_operand(struct pke_device* me, int operand_num) |
43a6998b FCE |
1007 | { |
1008 | unsigned_4* operand = NULL; | |
1009 | struct fifo_quadword* fifo_operand; | |
fba9bfed | 1010 | |
9614fb3c | 1011 | fifo_operand = pke_pcrel_fifo(me, operand_num, & operand); |
fba9bfed | 1012 | |
43a6998b | 1013 | if(fifo_operand == NULL) |
9614fb3c | 1014 | ASSERT(operand == NULL); /* pke_pcrel_fifo() ought leave it untouched */ |
fba9bfed | 1015 | |
43a6998b FCE |
1016 | return operand; |
1017 | } | |
fba9bfed | 1018 | |
fba9bfed | 1019 | |
db6dac32 FCE |
1020 | /* Return a bit-field extract of given operand# in FIFO, and its |
1021 | source-addr. `bit_offset' starts at 0, referring to LSB after PKE | |
1022 | instruction word. Width must be >0, <=32. Assume FIFO is full | |
1023 | enough. Skip over DMA tags, but mark them as an error (ER0). */ | |
1024 | ||
1025 | unsigned_4 | |
9614fb3c | 1026 | pke_pcrel_operand_bits(struct pke_device* me, int bit_offset, int bit_width, unsigned_4* source_addr) |
db6dac32 FCE |
1027 | { |
1028 | unsigned_4* word = NULL; | |
1029 | unsigned_4 value; | |
1030 | struct fifo_quadword* fifo_operand; | |
534a3d5c FCE |
1031 | int wordnumber, bitnumber; |
1032 | ||
1033 | wordnumber = bit_offset/32; | |
1034 | bitnumber = bit_offset%32; | |
db6dac32 FCE |
1035 | |
1036 | /* find operand word with bitfield */ | |
9614fb3c | 1037 | fifo_operand = pke_pcrel_fifo(me, wordnumber + 1, &word); |
534a3d5c | 1038 | ASSERT(word != NULL); |
db6dac32 FCE |
1039 | |
1040 | /* extract bitfield from word */ | |
534a3d5c | 1041 | value = BIT_MASK_GET(*word, bitnumber, bitnumber + bit_width - 1); |
db6dac32 FCE |
1042 | |
1043 | /* extract source addr from fifo word */ | |
1044 | *source_addr = fifo_operand->source_address; | |
1045 | ||
1046 | return value; | |
1047 | } | |
1048 | ||
fba9bfed | 1049 | |
fba9bfed | 1050 | |
d22ea5d0 FCE |
1051 | /* check for stall conditions on indicated devices (path* only on |
1052 | PKE1), do not change status; return 0 iff no stall */ | |
43a6998b FCE |
1053 | int |
1054 | pke_check_stall(struct pke_device* me, enum pke_check_target what) | |
1055 | { | |
1056 | int any_stall = 0; | |
e2306992 | 1057 | unsigned_4 cop2_stat, gpuif_stat; |
43a6998b | 1058 | |
e2306992 | 1059 | /* read status words */ |
534a3d5c FCE |
1060 | ASSERT(sizeof(unsigned_4) == 4); |
1061 | PKE_MEM_READ(me, (GIF_REG_STAT), | |
1062 | & gpuif_stat, | |
1063 | 4); | |
1064 | PKE_MEM_READ(me, (COP2_REG_STAT_ADDR), | |
1065 | & cop2_stat, | |
1066 | 4); | |
e2306992 | 1067 | |
43a6998b FCE |
1068 | /* perform checks */ |
1069 | if(what == chk_vu) | |
1070 | { | |
e2306992 FCE |
1071 | if(me->pke_number == 0) |
1072 | any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS0_B, COP2_REG_STAT_VBS0_E); | |
1073 | else /* if(me->pke_number == 1) */ | |
1074 | any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS1_B, COP2_REG_STAT_VBS1_E); | |
db6dac32 FCE |
1075 | } |
1076 | else if(what == chk_path1) /* VU -> GPUIF */ | |
1077 | { | |
1078 | if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 1) | |
43a6998b | 1079 | any_stall = 1; |
fba9bfed | 1080 | } |
db6dac32 | 1081 | else if(what == chk_path2) /* PKE -> GPUIF */ |
fba9bfed | 1082 | { |
db6dac32 FCE |
1083 | if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 2) |
1084 | any_stall = 1; | |
1085 | } | |
1086 | else if(what == chk_path3) /* DMA -> GPUIF */ | |
1087 | { | |
1088 | if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 3) | |
43a6998b FCE |
1089 | any_stall = 1; |
1090 | } | |
1091 | else | |
1092 | { | |
db6dac32 FCE |
1093 | /* invalid what */ |
1094 | ASSERT(0); | |
43a6998b | 1095 | } |
fba9bfed | 1096 | |
43a6998b FCE |
1097 | /* any stall reasons? */ |
1098 | return any_stall; | |
1099 | } | |
fba9bfed | 1100 | |
fba9bfed | 1101 | |
d22ea5d0 | 1102 | /* PKE1 only: flip the DBF bit; recompute TOPS, TOP */ |
43a6998b FCE |
1103 | void |
1104 | pke_flip_dbf(struct pke_device* me) | |
1105 | { | |
370e0ef7 | 1106 | int newdf; |
d22ea5d0 | 1107 | /* compute new TOP */ |
733cfc78 IC |
1108 | PKE_REG_MASK_SET(me, TOP, TOP, |
1109 | PKE_REG_MASK_GET(me, TOPS, TOPS)); | |
43a6998b | 1110 | /* flip DBF */ |
370e0ef7 FCE |
1111 | newdf = PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1; |
1112 | PKE_REG_MASK_SET(me, DBF, DF, newdf); | |
1113 | PKE_REG_MASK_SET(me, STAT, DBF, newdf); | |
43a6998b FCE |
1114 | /* compute new TOPS */ |
1115 | PKE_REG_MASK_SET(me, TOPS, TOPS, | |
1116 | (PKE_REG_MASK_GET(me, BASE, BASE) + | |
370e0ef7 FCE |
1117 | newdf * PKE_REG_MASK_GET(me, OFST, OFFSET))); |
1118 | ||
d22ea5d0 FCE |
1119 | /* this is equivalent to last word from okadaa (98-02-25): |
1120 | 1) TOP=TOPS; | |
1121 | 2) TOPS=BASE + !DBF*OFFSET | |
1122 | 3) DBF=!DBF */ | |
43a6998b | 1123 | } |
fba9bfed | 1124 | |
fba9bfed | 1125 | |
9614fb3c FCE |
1126 | /* set the STAT:PIS bit and send an interrupt to the 5900 */ |
1127 | void | |
1128 | pke_begin_interrupt_stall(struct pke_device* me) | |
1129 | { | |
1130 | /* set PIS */ | |
1131 | PKE_REG_MASK_SET(me, STAT, PIS, 1); | |
1132 | ||
1133 | /* XXX: send interrupt to 5900? */ | |
1134 | } | |
1135 | ||
1136 | ||
1137 | ||
fba9bfed | 1138 | |
43a6998b FCE |
1139 | /* PKEcode handler functions -- responsible for checking and |
1140 | confirming old stall conditions, executing pkecode, updating PC and | |
1141 | status registers -- may assume being run on correct PKE unit */ | |
1142 | ||
1143 | void | |
1144 | pke_code_nop(struct pke_device* me, unsigned_4 pkecode) | |
1145 | { | |
1146 | /* done */ | |
1147 | pke_pc_advance(me, 1); | |
1148 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1149 | } | |
fba9bfed | 1150 | |
fba9bfed | 1151 | |
43a6998b FCE |
1152 | void |
1153 | pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode) | |
1154 | { | |
1155 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
d22ea5d0 | 1156 | |
43a6998b | 1157 | /* copy immediate value into CYCLE reg */ |
534a3d5c FCE |
1158 | PKE_REG_MASK_SET(me, CYCLE, WL, BIT_MASK_GET(imm, 8, 15)); |
1159 | PKE_REG_MASK_SET(me, CYCLE, CL, BIT_MASK_GET(imm, 0, 7)); | |
43a6998b FCE |
1160 | /* done */ |
1161 | pke_pc_advance(me, 1); | |
1162 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1163 | } | |
fba9bfed | 1164 | |
fba9bfed | 1165 | |
43a6998b FCE |
1166 | void |
1167 | pke_code_offset(struct pke_device* me, unsigned_4 pkecode) | |
1168 | { | |
1169 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
d22ea5d0 | 1170 | |
43a6998b FCE |
1171 | /* copy 10 bits to OFFSET field */ |
1172 | PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9)); | |
1173 | /* clear DBF bit */ | |
1174 | PKE_REG_MASK_SET(me, DBF, DF, 0); | |
1175 | /* clear other DBF bit */ | |
1176 | PKE_REG_MASK_SET(me, STAT, DBF, 0); | |
1177 | /* set TOPS = BASE */ | |
1178 | PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE)); | |
1179 | /* done */ | |
1180 | pke_pc_advance(me, 1); | |
1181 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1182 | } | |
fba9bfed | 1183 | |
fba9bfed | 1184 | |
43a6998b FCE |
1185 | void |
1186 | pke_code_base(struct pke_device* me, unsigned_4 pkecode) | |
1187 | { | |
1188 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
d22ea5d0 | 1189 | |
43a6998b FCE |
1190 | /* copy 10 bits to BASE field */ |
1191 | PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9)); | |
43a6998b FCE |
1192 | /* done */ |
1193 | pke_pc_advance(me, 1); | |
1194 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1195 | } | |
fba9bfed | 1196 | |
fba9bfed | 1197 | |
43a6998b FCE |
1198 | void |
1199 | pke_code_itop(struct pke_device* me, unsigned_4 pkecode) | |
1200 | { | |
1201 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
d22ea5d0 | 1202 | |
43a6998b FCE |
1203 | /* copy 10 bits to ITOPS field */ |
1204 | PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9)); | |
1205 | /* done */ | |
1206 | pke_pc_advance(me, 1); | |
1207 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1208 | } | |
fba9bfed | 1209 | |
fba9bfed | 1210 | |
43a6998b FCE |
1211 | void |
1212 | pke_code_stmod(struct pke_device* me, unsigned_4 pkecode) | |
1213 | { | |
1214 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
d22ea5d0 | 1215 | |
43a6998b FCE |
1216 | /* copy 2 bits to MODE register */ |
1217 | PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2)); | |
1218 | /* done */ | |
1219 | pke_pc_advance(me, 1); | |
1220 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1221 | } | |
fba9bfed | 1222 | |
43a6998b FCE |
1223 | |
1224 | void | |
1225 | pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode) | |
1226 | { | |
b4d2f483 FCE |
1227 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); |
1228 | unsigned_4 gif_mode; | |
1229 | ||
89154e47 | 1230 | /* set appropriate bit */ |
b4d2f483 | 1231 | if(BIT_MASK_GET(imm, PKE_REG_MSKPATH3_B, PKE_REG_MSKPATH3_E) != 0) |
89154e47 | 1232 | gif_mode = GIF_REG_MODE_M3R_MASK; |
b4d2f483 | 1233 | else |
89154e47 | 1234 | gif_mode = 0; |
b4d2f483 | 1235 | |
89154e47 | 1236 | /* write register; patrickm code will look at M3R bit only */ |
b4d2f483 FCE |
1237 | PKE_MEM_WRITE(me, GIF_REG_MODE, & gif_mode, 4); |
1238 | ||
b4d2f483 FCE |
1239 | /* done */ |
1240 | pke_pc_advance(me, 1); | |
1241 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
43a6998b FCE |
1242 | } |
1243 | ||
1244 | ||
1245 | void | |
1246 | pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode) | |
1247 | { | |
1248 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
1249 | /* copy 16 bits to MARK register */ | |
1250 | PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15)); | |
1251 | /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */ | |
1252 | PKE_REG_MASK_SET(me, STAT, MRK, 1); | |
1253 | /* done */ | |
1254 | pke_pc_advance(me, 1); | |
1255 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1256 | } | |
1257 | ||
1258 | ||
1259 | void | |
1260 | pke_code_flushe(struct pke_device* me, unsigned_4 pkecode) | |
1261 | { | |
1262 | /* compute next PEW bit */ | |
1263 | if(pke_check_stall(me, chk_vu)) | |
1264 | { | |
1265 | /* VU busy */ | |
1266 | PKE_REG_MASK_SET(me, STAT, PEW, 1); | |
1267 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); | |
1268 | /* try again next cycle */ | |
fba9bfed | 1269 | } |
fba9bfed FCE |
1270 | else |
1271 | { | |
43a6998b FCE |
1272 | /* VU idle */ |
1273 | PKE_REG_MASK_SET(me, STAT, PEW, 0); | |
1274 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
fba9bfed FCE |
1275 | pke_pc_advance(me, 1); |
1276 | } | |
fba9bfed FCE |
1277 | } |
1278 | ||
1279 | ||
43a6998b FCE |
1280 | void |
1281 | pke_code_flush(struct pke_device* me, unsigned_4 pkecode) | |
1282 | { | |
1283 | int something_busy = 0; | |
1284 | ||
1285 | /* compute next PEW, PGW bits */ | |
1286 | if(pke_check_stall(me, chk_vu)) | |
1287 | { | |
1288 | something_busy = 1; | |
1289 | PKE_REG_MASK_SET(me, STAT, PEW, 1); | |
1290 | } | |
1291 | else | |
1292 | PKE_REG_MASK_SET(me, STAT, PEW, 0); | |
fba9bfed FCE |
1293 | |
1294 | ||
43a6998b FCE |
1295 | if(pke_check_stall(me, chk_path1) || |
1296 | pke_check_stall(me, chk_path2)) | |
1297 | { | |
1298 | something_busy = 1; | |
1299 | PKE_REG_MASK_SET(me, STAT, PGW, 1); | |
1300 | } | |
1301 | else | |
1302 | PKE_REG_MASK_SET(me, STAT, PGW, 0); | |
fba9bfed | 1303 | |
43a6998b FCE |
1304 | /* go or no go */ |
1305 | if(something_busy) | |
1306 | { | |
1307 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1308 | /* try again next cycle */ | |
1309 | } | |
1310 | else | |
1311 | { | |
1312 | /* all idle */ | |
1313 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1314 | pke_pc_advance(me, 1); | |
1315 | } | |
1316 | } | |
fba9bfed | 1317 | |
fba9bfed FCE |
1318 | |
1319 | void | |
43a6998b | 1320 | pke_code_flusha(struct pke_device* me, unsigned_4 pkecode) |
fba9bfed | 1321 | { |
43a6998b | 1322 | int something_busy = 0; |
fba9bfed | 1323 | |
43a6998b FCE |
1324 | /* compute next PEW, PGW bits */ |
1325 | if(pke_check_stall(me, chk_vu)) | |
fba9bfed | 1326 | { |
43a6998b FCE |
1327 | something_busy = 1; |
1328 | PKE_REG_MASK_SET(me, STAT, PEW, 1); | |
fba9bfed | 1329 | } |
43a6998b FCE |
1330 | else |
1331 | PKE_REG_MASK_SET(me, STAT, PEW, 0); | |
fba9bfed | 1332 | |
43a6998b FCE |
1333 | |
1334 | if(pke_check_stall(me, chk_path1) || | |
1335 | pke_check_stall(me, chk_path2) || | |
1336 | pke_check_stall(me, chk_path3)) | |
fba9bfed | 1337 | { |
43a6998b FCE |
1338 | something_busy = 1; |
1339 | PKE_REG_MASK_SET(me, STAT, PGW, 1); | |
fba9bfed | 1340 | } |
43a6998b FCE |
1341 | else |
1342 | PKE_REG_MASK_SET(me, STAT, PGW, 0); | |
fba9bfed | 1343 | |
43a6998b FCE |
1344 | if(something_busy) |
1345 | { | |
1346 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1347 | /* try again next cycle */ | |
1348 | } | |
1349 | else | |
1350 | { | |
1351 | /* all idle */ | |
1352 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1353 | pke_pc_advance(me, 1); | |
1354 | } | |
fba9bfed FCE |
1355 | } |
1356 | ||
1357 | ||
43a6998b FCE |
1358 | void |
1359 | pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode) | |
1360 | { | |
1361 | /* compute next PEW bit */ | |
1362 | if(pke_check_stall(me, chk_vu)) | |
1363 | { | |
1364 | /* VU busy */ | |
1365 | PKE_REG_MASK_SET(me, STAT, PEW, 1); | |
1366 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); | |
1367 | /* try again next cycle */ | |
1368 | } | |
1369 | else | |
1370 | { | |
1371 | unsigned_4 vu_pc; | |
1372 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
1373 | ||
1374 | /* VU idle */ | |
1375 | PKE_REG_MASK_SET(me, STAT, PEW, 0); | |
1376 | ||
1377 | /* flip DBF on PKE1 */ | |
1378 | if(me->pke_number == 1) | |
1379 | pke_flip_dbf(me); | |
1380 | ||
f0bb94cd | 1381 | /* compute new PC for VU (host byte-order) */ |
db6dac32 | 1382 | vu_pc = BIT_MASK_GET(imm, 0, 15); |
f0bb94cd | 1383 | vu_pc = T2H_4(vu_pc); |
534a3d5c | 1384 | |
43a6998b | 1385 | /* write new PC; callback function gets VU running */ |
534a3d5c FCE |
1386 | ASSERT(sizeof(unsigned_4) == 4); |
1387 | PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA), | |
1388 | & vu_pc, | |
1389 | 4); | |
43a6998b | 1390 | |
d22ea5d0 FCE |
1391 | /* copy ITOPS field to ITOP */ |
1392 | PKE_REG_MASK_SET(me, ITOP, ITOP, PKE_REG_MASK_GET(me, ITOPS, ITOPS)); | |
1393 | ||
43a6998b FCE |
1394 | /* done */ |
1395 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1396 | pke_pc_advance(me, 1); | |
1397 | } | |
1398 | } | |
fba9bfed | 1399 | |
fba9bfed | 1400 | |
43a6998b FCE |
1401 | |
1402 | void | |
1403 | pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode) | |
fba9bfed | 1404 | { |
43a6998b FCE |
1405 | /* compute next PEW bit */ |
1406 | if(pke_check_stall(me, chk_vu)) | |
1407 | { | |
1408 | /* VU busy */ | |
1409 | PKE_REG_MASK_SET(me, STAT, PEW, 1); | |
1410 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); | |
1411 | /* try again next cycle */ | |
1412 | } | |
1413 | else | |
1414 | { | |
1415 | unsigned_4 vu_pc; | |
fba9bfed | 1416 | |
43a6998b FCE |
1417 | /* VU idle */ |
1418 | PKE_REG_MASK_SET(me, STAT, PEW, 0); | |
fba9bfed | 1419 | |
43a6998b FCE |
1420 | /* flip DBF on PKE1 */ |
1421 | if(me->pke_number == 1) | |
1422 | pke_flip_dbf(me); | |
fba9bfed | 1423 | |
43a6998b | 1424 | /* read old PC */ |
534a3d5c FCE |
1425 | ASSERT(sizeof(unsigned_4) == 4); |
1426 | PKE_MEM_READ(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA), | |
1427 | & vu_pc, | |
1428 | 4); | |
43a6998b FCE |
1429 | |
1430 | /* rewrite new PC; callback function gets VU running */ | |
534a3d5c FCE |
1431 | ASSERT(sizeof(unsigned_4) == 4); |
1432 | PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA), | |
1433 | & vu_pc, | |
1434 | 4); | |
43a6998b | 1435 | |
d22ea5d0 FCE |
1436 | /* copy ITOPS field to ITOP */ |
1437 | PKE_REG_MASK_SET(me, ITOP, ITOP, PKE_REG_MASK_GET(me, ITOPS, ITOPS)); | |
1438 | ||
43a6998b FCE |
1439 | /* done */ |
1440 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1441 | pke_pc_advance(me, 1); | |
1442 | } | |
1443 | } | |
1444 | ||
1445 | ||
1446 | void | |
1447 | pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode) | |
1448 | { | |
1449 | int something_busy = 0; | |
1450 | ||
1451 | /* compute next PEW, PGW bits */ | |
1452 | if(pke_check_stall(me, chk_vu)) | |
fba9bfed | 1453 | { |
43a6998b FCE |
1454 | something_busy = 1; |
1455 | PKE_REG_MASK_SET(me, STAT, PEW, 1); | |
fba9bfed | 1456 | } |
43a6998b FCE |
1457 | else |
1458 | PKE_REG_MASK_SET(me, STAT, PEW, 0); | |
fba9bfed | 1459 | |
43a6998b FCE |
1460 | |
1461 | if(pke_check_stall(me, chk_path1) || | |
1462 | pke_check_stall(me, chk_path2) || | |
1463 | pke_check_stall(me, chk_path3)) | |
1464 | { | |
1465 | something_busy = 1; | |
1466 | PKE_REG_MASK_SET(me, STAT, PGW, 1); | |
1467 | } | |
fba9bfed | 1468 | else |
43a6998b | 1469 | PKE_REG_MASK_SET(me, STAT, PGW, 0); |
fba9bfed | 1470 | |
43a6998b FCE |
1471 | /* go or no go */ |
1472 | if(something_busy) | |
1473 | { | |
1474 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1475 | /* try again next cycle */ | |
1476 | } | |
1477 | else | |
1478 | { | |
1479 | unsigned_4 vu_pc; | |
1480 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
1481 | ||
1482 | /* flip DBF on PKE1 */ | |
1483 | if(me->pke_number == 1) | |
1484 | pke_flip_dbf(me); | |
1485 | ||
f0bb94cd | 1486 | /* compute new PC for VU (host byte-order) */ |
db6dac32 | 1487 | vu_pc = BIT_MASK_GET(imm, 0, 15); |
f0bb94cd | 1488 | vu_pc = T2H_4(vu_pc); |
534a3d5c FCE |
1489 | |
1490 | /* rewrite new PC; callback function gets VU running */ | |
1491 | ASSERT(sizeof(unsigned_4) == 4); | |
1492 | PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA), | |
1493 | & vu_pc, | |
1494 | 4); | |
43a6998b | 1495 | |
d22ea5d0 FCE |
1496 | /* copy ITOPS field to ITOP */ |
1497 | PKE_REG_MASK_SET(me, ITOP, ITOP, PKE_REG_MASK_GET(me, ITOPS, ITOPS)); | |
1498 | ||
43a6998b FCE |
1499 | /* done */ |
1500 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1501 | pke_pc_advance(me, 1); | |
1502 | } | |
fba9bfed FCE |
1503 | } |
1504 | ||
1505 | ||
43a6998b FCE |
1506 | void |
1507 | pke_code_stmask(struct pke_device* me, unsigned_4 pkecode) | |
1508 | { | |
43a6998b | 1509 | unsigned_4* mask; |
d22ea5d0 | 1510 | |
d22ea5d0 | 1511 | /* check that FIFO has one more word for STMASK operand */ |
9614fb3c | 1512 | mask = pke_pcrel_operand(me, 1); |
43a6998b FCE |
1513 | if(mask != NULL) |
1514 | { | |
1515 | /* "transferring" operand */ | |
1516 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
db6dac32 FCE |
1517 | |
1518 | /* set NUM */ | |
1519 | PKE_REG_MASK_SET(me, NUM, NUM, 1); | |
1520 | ||
43a6998b FCE |
1521 | /* fill the register */ |
1522 | PKE_REG_MASK_SET(me, MASK, MASK, *mask); | |
db6dac32 FCE |
1523 | |
1524 | /* set NUM */ | |
1525 | PKE_REG_MASK_SET(me, NUM, NUM, 0); | |
1526 | ||
43a6998b FCE |
1527 | /* done */ |
1528 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
e2306992 | 1529 | pke_pc_advance(me, 2); |
43a6998b FCE |
1530 | } |
1531 | else | |
1532 | { | |
1533 | /* need to wait for another word */ | |
1534 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1535 | /* try again next cycle */ | |
1536 | } | |
1537 | } | |
fba9bfed | 1538 | |
fba9bfed | 1539 | |
43a6998b FCE |
1540 | void |
1541 | pke_code_strow(struct pke_device* me, unsigned_4 pkecode) | |
fba9bfed | 1542 | { |
43a6998b FCE |
1543 | /* check that FIFO has four more words for STROW operand */ |
1544 | unsigned_4* last_op; | |
1545 | ||
9614fb3c | 1546 | last_op = pke_pcrel_operand(me, 4); |
43a6998b FCE |
1547 | if(last_op != NULL) |
1548 | { | |
1549 | /* "transferring" operand */ | |
1550 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
1551 | ||
db6dac32 FCE |
1552 | /* set NUM */ |
1553 | PKE_REG_MASK_SET(me, NUM, NUM, 1); | |
1554 | ||
43a6998b | 1555 | /* copy ROW registers: must all exist if 4th operand exists */ |
9614fb3c FCE |
1556 | me->regs[PKE_REG_R0][0] = * pke_pcrel_operand(me, 1); |
1557 | me->regs[PKE_REG_R1][0] = * pke_pcrel_operand(me, 2); | |
1558 | me->regs[PKE_REG_R2][0] = * pke_pcrel_operand(me, 3); | |
1559 | me->regs[PKE_REG_R3][0] = * pke_pcrel_operand(me, 4); | |
43a6998b | 1560 | |
db6dac32 FCE |
1561 | /* set NUM */ |
1562 | PKE_REG_MASK_SET(me, NUM, NUM, 0); | |
1563 | ||
43a6998b FCE |
1564 | /* done */ |
1565 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1566 | pke_pc_advance(me, 5); | |
1567 | } | |
1568 | else | |
1569 | { | |
1570 | /* need to wait for another word */ | |
1571 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1572 | /* try again next cycle */ | |
1573 | } | |
1574 | } | |
aea481da | 1575 | |
fba9bfed | 1576 | |
43a6998b FCE |
1577 | void |
1578 | pke_code_stcol(struct pke_device* me, unsigned_4 pkecode) | |
1579 | { | |
1580 | /* check that FIFO has four more words for STCOL operand */ | |
1581 | unsigned_4* last_op; | |
1582 | ||
9614fb3c | 1583 | last_op = pke_pcrel_operand(me, 4); |
43a6998b | 1584 | if(last_op != NULL) |
fba9bfed | 1585 | { |
43a6998b FCE |
1586 | /* "transferring" operand */ |
1587 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
1588 | ||
db6dac32 FCE |
1589 | /* set NUM */ |
1590 | PKE_REG_MASK_SET(me, NUM, NUM, 1); | |
1591 | ||
43a6998b | 1592 | /* copy COL registers: must all exist if 4th operand exists */ |
9614fb3c FCE |
1593 | me->regs[PKE_REG_C0][0] = * pke_pcrel_operand(me, 1); |
1594 | me->regs[PKE_REG_C1][0] = * pke_pcrel_operand(me, 2); | |
1595 | me->regs[PKE_REG_C2][0] = * pke_pcrel_operand(me, 3); | |
1596 | me->regs[PKE_REG_C3][0] = * pke_pcrel_operand(me, 4); | |
43a6998b | 1597 | |
db6dac32 FCE |
1598 | /* set NUM */ |
1599 | PKE_REG_MASK_SET(me, NUM, NUM, 0); | |
1600 | ||
43a6998b FCE |
1601 | /* done */ |
1602 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1603 | pke_pc_advance(me, 5); | |
fba9bfed | 1604 | } |
fba9bfed | 1605 | else |
43a6998b FCE |
1606 | { |
1607 | /* need to wait for another word */ | |
1608 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1609 | /* try again next cycle */ | |
1610 | } | |
1611 | } | |
fba9bfed | 1612 | |
43a6998b FCE |
1613 | |
1614 | void | |
1615 | pke_code_mpg(struct pke_device* me, unsigned_4 pkecode) | |
1616 | { | |
1617 | unsigned_4* last_mpg_word; | |
1618 | int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E); | |
1619 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
1620 | ||
653c2590 FCE |
1621 | /* assert 64-bit alignment of MPG operand */ |
1622 | if(me->qw_pc != 3 && me->qw_pc != 1) | |
1623 | return pke_code_error(me, pkecode); | |
1624 | ||
43a6998b FCE |
1625 | /* map zero to max+1 */ |
1626 | if(num==0) num=0x100; | |
1627 | ||
1628 | /* check that FIFO has a few more words for MPG operand */ | |
9614fb3c | 1629 | last_mpg_word = pke_pcrel_operand(me, num*2); /* num: number of 64-bit words */ |
43a6998b FCE |
1630 | if(last_mpg_word != NULL) |
1631 | { | |
1632 | /* perform implied FLUSHE */ | |
db6dac32 | 1633 | if(pke_check_stall(me, chk_vu)) |
653c2590 FCE |
1634 | { |
1635 | /* VU busy */ | |
1636 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); | |
1637 | /* retry this instruction next clock */ | |
1638 | } | |
1639 | else | |
43a6998b FCE |
1640 | { |
1641 | /* VU idle */ | |
1642 | int i; | |
1643 | ||
1644 | /* "transferring" operand */ | |
1645 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
1646 | ||
534a3d5c FCE |
1647 | /* set NUM */ |
1648 | PKE_REG_MASK_SET(me, NUM, NUM, num); | |
1649 | ||
1650 | /* transfer VU instructions, one word-pair per iteration */ | |
1651 | for(i=0; i<num; i++) | |
43a6998b FCE |
1652 | { |
1653 | address_word vu_addr_base, vu_addr; | |
1654 | address_word vutrack_addr_base, vutrack_addr; | |
89154e47 | 1655 | address_word vu_addr_max_size; |
653c2590 | 1656 | unsigned_4 vu_lower_opcode, vu_upper_opcode; |
43a6998b | 1657 | unsigned_4* operand; |
534a3d5c FCE |
1658 | struct fifo_quadword* fq; |
1659 | int next_num; | |
1660 | ||
1661 | /* decrement NUM */ | |
1662 | next_num = PKE_REG_MASK_GET(me, NUM, NUM) - 1; | |
1663 | PKE_REG_MASK_SET(me, NUM, NUM, next_num); | |
43a6998b | 1664 | |
db6dac32 | 1665 | /* imm: in 64-bit units for MPG instruction */ |
43a6998b FCE |
1666 | /* VU*_MEM0 : instruction memory */ |
1667 | vu_addr_base = (me->pke_number == 0) ? | |
733cfc78 | 1668 | VU0_MEM0_WINDOW_START : VU1_MEM0_WINDOW_START; |
89154e47 FCE |
1669 | vu_addr_max_size = (me->pke_number == 0) ? |
1670 | VU0_MEM0_SIZE : VU1_MEM0_SIZE; | |
43a6998b FCE |
1671 | vutrack_addr_base = (me->pke_number == 0) ? |
1672 | VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START; | |
89154e47 FCE |
1673 | |
1674 | /* compute VU address for this word-pair */ | |
1675 | vu_addr = vu_addr_base + (imm + i) * 8; | |
1676 | /* check for vu_addr overflow */ | |
1677 | while(vu_addr >= vu_addr_base + vu_addr_max_size) | |
1678 | vu_addr -= vu_addr_max_size; | |
1679 | ||
1680 | /* compute VU tracking address */ | |
1681 | vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 2; | |
534a3d5c | 1682 | |
653c2590 | 1683 | /* Fetch operand words; assume they are already little-endian for VU imem */ |
9614fb3c | 1684 | fq = pke_pcrel_fifo(me, i*2 + 1, & operand); |
653c2590 | 1685 | vu_lower_opcode = *operand; |
9614fb3c | 1686 | vu_upper_opcode = *pke_pcrel_operand(me, i*2 + 2); |
43a6998b FCE |
1687 | |
1688 | /* write data into VU memory */ | |
f0bb94cd | 1689 | /* lower (scalar) opcode comes in first word ; macro performs H2T! */ |
534a3d5c | 1690 | PKE_MEM_WRITE(me, vu_addr, |
b4d2f483 | 1691 | & vu_lower_opcode, |
653c2590 | 1692 | 4); |
f0bb94cd | 1693 | /* upper (vector) opcode comes in second word ; H2T */ |
b4d2f483 | 1694 | ASSERT(sizeof(unsigned_4) == 4); |
653c2590 | 1695 | PKE_MEM_WRITE(me, vu_addr + 4, |
b4d2f483 | 1696 | & vu_upper_opcode, |
653c2590 | 1697 | 4); |
43a6998b | 1698 | |
653c2590 | 1699 | /* write tracking address in target byte-order */ |
653c2590 | 1700 | ASSERT(sizeof(unsigned_4) == 4); |
534a3d5c | 1701 | PKE_MEM_WRITE(me, vutrack_addr, |
f0bb94cd | 1702 | & fq->source_address, |
534a3d5c | 1703 | 4); |
43a6998b | 1704 | } /* VU xfer loop */ |
db6dac32 FCE |
1705 | |
1706 | /* check NUM */ | |
1707 | ASSERT(PKE_REG_MASK_GET(me, NUM, NUM) == 0); | |
43a6998b FCE |
1708 | |
1709 | /* done */ | |
1710 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1711 | pke_pc_advance(me, 1 + num*2); | |
1712 | } | |
43a6998b FCE |
1713 | } /* if FIFO full enough */ |
1714 | else | |
1715 | { | |
1716 | /* need to wait for another word */ | |
1717 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1718 | /* retry this instruction next clock */ | |
1719 | } | |
aea481da DE |
1720 | } |
1721 | ||
1722 | ||
43a6998b FCE |
1723 | void |
1724 | pke_code_direct(struct pke_device* me, unsigned_4 pkecode) | |
1725 | { | |
1726 | /* check that FIFO has a few more words for DIRECT operand */ | |
1727 | unsigned_4* last_direct_word; | |
1728 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
43a6998b | 1729 | |
653c2590 FCE |
1730 | /* assert 128-bit alignment of DIRECT operand */ |
1731 | if(me->qw_pc != 3) | |
1732 | return pke_code_error(me, pkecode); | |
1733 | ||
43a6998b FCE |
1734 | /* map zero to max+1 */ |
1735 | if(imm==0) imm=0x10000; | |
1736 | ||
9614fb3c | 1737 | last_direct_word = pke_pcrel_operand(me, imm*4); /* imm: number of 128-bit words */ |
43a6998b FCE |
1738 | if(last_direct_word != NULL) |
1739 | { | |
1740 | /* VU idle */ | |
1741 | int i; | |
f0bb94cd | 1742 | unsigned_16 fifo_data; |
43a6998b FCE |
1743 | |
1744 | /* "transferring" operand */ | |
1745 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
1746 | ||
1747 | /* transfer GPUIF quadwords, one word per iteration */ | |
1748 | for(i=0; i<imm*4; i++) | |
1749 | { | |
9614fb3c | 1750 | unsigned_4* operand = pke_pcrel_operand(me, 1+i); |
43a6998b FCE |
1751 | |
1752 | /* collect word into quadword */ | |
f0bb94cd | 1753 | *A4_16(&fifo_data, 3 - (i % 4)) = *operand; |
0b9843e5 | 1754 | |
534a3d5c FCE |
1755 | /* write to GPUIF FIFO only with full quadword */ |
1756 | if(i % 4 == 3) | |
43a6998b | 1757 | { |
534a3d5c FCE |
1758 | ASSERT(sizeof(fifo_data) == 16); |
1759 | PKE_MEM_WRITE(me, GIF_PATH2_FIFO_ADDR, | |
f0bb94cd | 1760 | & fifo_data, |
534a3d5c | 1761 | 16); |
43a6998b | 1762 | } /* write collected quadword */ |
43a6998b FCE |
1763 | } /* GPUIF xfer loop */ |
1764 | ||
1765 | /* done */ | |
1766 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
1767 | pke_pc_advance(me, 1 + imm*4); | |
1768 | } /* if FIFO full enough */ | |
1769 | else | |
1770 | { | |
1771 | /* need to wait for another word */ | |
1772 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
1773 | /* retry this instruction next clock */ | |
1774 | } | |
1775 | } | |
fba9bfed | 1776 | |
43a6998b FCE |
1777 | |
1778 | void | |
1779 | pke_code_directhl(struct pke_device* me, unsigned_4 pkecode) | |
fba9bfed | 1780 | { |
43a6998b FCE |
1781 | /* treat the same as DIRECTH */ |
1782 | pke_code_direct(me, pkecode); | |
1783 | } | |
fba9bfed | 1784 | |
43a6998b FCE |
1785 | |
1786 | void | |
1787 | pke_code_unpack(struct pke_device* me, unsigned_4 pkecode) | |
1788 | { | |
1789 | int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); | |
1790 | int cmd = BIT_MASK_GET(pkecode, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E); | |
1791 | int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E); | |
9614fb3c | 1792 | int nummx = (num == 0) ? 0x0100 : num; |
db6dac32 | 1793 | short vn = BIT_MASK_GET(cmd, 2, 3); /* unpack shape controls */ |
43a6998b | 1794 | short vl = BIT_MASK_GET(cmd, 0, 1); |
43a6998b | 1795 | int m = BIT_MASK_GET(cmd, 4, 4); |
db6dac32 | 1796 | short cl = PKE_REG_MASK_GET(me, CYCLE, CL); /* cycle controls */ |
43a6998b | 1797 | short wl = PKE_REG_MASK_GET(me, CYCLE, WL); |
9614fb3c | 1798 | short addrwl = (wl == 0) ? 0x0100 : wl; |
db6dac32 | 1799 | int r = BIT_MASK_GET(imm, 15, 15); /* indicator bits in imm value */ |
653c2590 | 1800 | int usn = BIT_MASK_GET(imm, 14, 14); |
db6dac32 | 1801 | |
43a6998b | 1802 | int n, num_operands; |
534a3d5c | 1803 | unsigned_4* last_operand_word = NULL; |
9614fb3c FCE |
1804 | |
1805 | /* catch all illegal UNPACK variants */ | |
1806 | if(vl == 3 && vn < 3) | |
1807 | { | |
1808 | pke_code_error(me, pkecode); | |
1809 | return; | |
1810 | } | |
fba9bfed | 1811 | |
43a6998b | 1812 | /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */ |
9614fb3c | 1813 | if(cl >= addrwl) |
43a6998b FCE |
1814 | n = num; |
1815 | else | |
9614fb3c | 1816 | n = cl * (nummx / addrwl) + PKE_LIMIT(nummx % addrwl, cl); |
fd909089 | 1817 | num_operands = (31 + (32 >> vl) * (vn+1) * n)/32; /* round up to next word */ |
43a6998b FCE |
1818 | |
1819 | /* confirm that FIFO has enough words in it */ | |
534a3d5c | 1820 | if(num_operands > 0) |
9614fb3c | 1821 | last_operand_word = pke_pcrel_operand(me, num_operands); |
534a3d5c | 1822 | if(last_operand_word != NULL || num_operands == 0) |
43a6998b | 1823 | { |
534a3d5c FCE |
1824 | address_word vu_addr_base, vutrack_addr_base; |
1825 | address_word vu_addr_max_size; | |
1826 | int vector_num_out, vector_num_in; | |
43a6998b FCE |
1827 | |
1828 | /* "transferring" operand */ | |
1829 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); | |
1830 | ||
1831 | /* don't check whether VU is idle */ | |
db6dac32 FCE |
1832 | |
1833 | /* compute VU address base */ | |
43a6998b | 1834 | if(me->pke_number == 0) |
534a3d5c | 1835 | { |
d22ea5d0 | 1836 | vu_addr_base = VU0_MEM1_WINDOW_START; |
534a3d5c | 1837 | vu_addr_max_size = VU0_MEM1_SIZE; |
d22ea5d0 FCE |
1838 | vutrack_addr_base = VU0_MEM1_SRCADDR_START; |
1839 | r = 0; | |
534a3d5c | 1840 | } |
43a6998b FCE |
1841 | else |
1842 | { | |
d22ea5d0 | 1843 | vu_addr_base = VU1_MEM1_WINDOW_START; |
534a3d5c | 1844 | vu_addr_max_size = VU1_MEM1_SIZE; |
d22ea5d0 | 1845 | vutrack_addr_base = VU1_MEM1_SRCADDR_START; |
43a6998b | 1846 | } |
db6dac32 FCE |
1847 | |
1848 | /* set NUM */ | |
9614fb3c | 1849 | PKE_REG_MASK_SET(me, NUM, NUM, nummx); |
db6dac32 | 1850 | |
43a6998b | 1851 | /* transfer given number of vectors */ |
534a3d5c FCE |
1852 | vector_num_out = 0; /* output vector number being processed */ |
1853 | vector_num_in = 0; /* argument vector number being processed */ | |
db6dac32 | 1854 | do |
43a6998b FCE |
1855 | { |
1856 | quadword vu_old_data; | |
1857 | quadword vu_new_data; | |
1858 | quadword unpacked_data; | |
1859 | address_word vu_addr; | |
534a3d5c | 1860 | address_word vutrack_addr; |
db6dac32 | 1861 | unsigned_4 source_addr = 0; |
43a6998b | 1862 | int i; |
534a3d5c FCE |
1863 | int next_num; |
1864 | ||
db6dac32 | 1865 | /* decrement NUM */ |
534a3d5c FCE |
1866 | next_num = PKE_REG_MASK_GET(me, NUM, NUM) - 1; |
1867 | PKE_REG_MASK_SET(me, NUM, NUM, next_num); | |
1868 | ||
43a6998b FCE |
1869 | /* compute VU destination address, as bytes in R5900 memory */ |
1870 | if(cl >= wl) | |
1871 | { | |
1872 | /* map zero to max+1 */ | |
d22ea5d0 | 1873 | vu_addr = vu_addr_base + 16 * (BIT_MASK_GET(imm, 0, 9) + |
fd909089 FCE |
1874 | (vector_num_out / addrwl) * cl + |
1875 | (vector_num_out % addrwl)); | |
43a6998b FCE |
1876 | } |
1877 | else | |
d22ea5d0 | 1878 | vu_addr = vu_addr_base + 16 * (BIT_MASK_GET(imm, 0, 9) + |
d22ea5d0 | 1879 | vector_num_out); |
fd909089 FCE |
1880 | |
1881 | /* handle "R" double-buffering bit */ | |
1882 | if(r) | |
1883 | vu_addr += 16 * PKE_REG_MASK_GET(me, TOPS, TOPS); | |
534a3d5c FCE |
1884 | |
1885 | /* check for vu_addr overflow */ | |
1886 | while(vu_addr >= vu_addr_base + vu_addr_max_size) | |
1887 | vu_addr -= vu_addr_max_size; | |
1888 | ||
1889 | /* compute address of tracking table entry */ | |
1890 | vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 4; | |
db6dac32 | 1891 | |
f0bb94cd FCE |
1892 | /* read old VU data word at address; reverse words if needed */ |
1893 | { | |
1894 | unsigned_16 vu_old_badwords; | |
1895 | ASSERT(sizeof(vu_old_badwords) == 16); | |
1896 | PKE_MEM_READ(me, vu_addr, | |
1897 | &vu_old_badwords, 16); | |
1898 | vu_old_data[0] = * A4_16(& vu_old_badwords, 3); | |
1899 | vu_old_data[1] = * A4_16(& vu_old_badwords, 2); | |
1900 | vu_old_data[2] = * A4_16(& vu_old_badwords, 1); | |
1901 | vu_old_data[3] = * A4_16(& vu_old_badwords, 0); | |
1902 | } | |
653c2590 | 1903 | |
43a6998b FCE |
1904 | /* For cyclic unpack, next operand quadword may come from instruction stream |
1905 | or be zero. */ | |
9614fb3c FCE |
1906 | if((cl < addrwl) && |
1907 | (vector_num_out % addrwl) >= cl) | |
43a6998b FCE |
1908 | { |
1909 | /* clear operand - used only in a "indeterminate" state */ | |
1910 | for(i = 0; i < 4; i++) | |
1911 | unpacked_data[i] = 0; | |
1912 | } | |
1913 | else | |
1914 | { | |
db6dac32 | 1915 | /* compute packed vector dimensions */ |
9614fb3c | 1916 | int vectorbits = 0, unitbits = 0; |
db6dac32 FCE |
1917 | |
1918 | if(vl < 3) /* PKE_UNPACK_*_{32,16,8} */ | |
43a6998b | 1919 | { |
db6dac32 FCE |
1920 | unitbits = (32 >> vl); |
1921 | vectorbits = unitbits * (vn+1); | |
1922 | } | |
1923 | else if(vl == 3 && vn == 3) /* PKE_UNPACK_V4_5 */ | |
1924 | { | |
1925 | unitbits = 5; | |
1926 | vectorbits = 16; | |
1927 | } | |
1928 | else /* illegal unpack variant */ | |
1929 | { | |
9614fb3c FCE |
1930 | /* should have been caught at top of function */ |
1931 | ASSERT(0); | |
db6dac32 FCE |
1932 | } |
1933 | ||
1934 | /* loop over columns */ | |
1935 | for(i=0; i<=vn; i++) | |
1936 | { | |
1937 | unsigned_4 operand; | |
fba9bfed | 1938 | |
db6dac32 FCE |
1939 | /* offset in bits in current operand word */ |
1940 | int bitoffset = | |
534a3d5c | 1941 | (vector_num_in * vectorbits) + (i * unitbits); /* # of bits from PKEcode */ |
43a6998b | 1942 | |
db6dac32 FCE |
1943 | /* last unit of V4_5 is only one bit wide */ |
1944 | if(vl == 3 && vn == 3 && i == 3) /* PKE_UNPACK_V4_5 */ | |
1945 | unitbits = 1; | |
1946 | ||
9614fb3c FCE |
1947 | /* confirm we're not reading more than we said we needed */ |
1948 | if(vector_num_in * vectorbits >= num_operands * 32) | |
1949 | { | |
1950 | /* this condition may be triggered by illegal | |
1951 | PKEcode / CYCLE combinations. */ | |
1952 | pke_code_error(me, pkecode); | |
1953 | /* XXX: this case needs to be better understood, | |
1954 | and detected at a better time. */ | |
1955 | return; | |
1956 | } | |
1957 | ||
db6dac32 | 1958 | /* fetch bitfield operand */ |
9614fb3c | 1959 | operand = pke_pcrel_operand_bits(me, bitoffset, unitbits, & source_addr); |
db6dac32 FCE |
1960 | |
1961 | /* selectively sign-extend; not for V4_5 1-bit value */ | |
653c2590 | 1962 | if(usn || unitbits == 1) |
db6dac32 | 1963 | unpacked_data[i] = operand; |
653c2590 FCE |
1964 | else |
1965 | unpacked_data[i] = SEXT32(operand, unitbits-1); | |
43a6998b | 1966 | } |
534a3d5c | 1967 | |
9614fb3c FCE |
1968 | /* set remaining top words in vector */ |
1969 | for(i=vn+1; i<4; i++) | |
1970 | { | |
1971 | if(vn == 0) /* S_{32,16,8}: copy lowest element */ | |
1972 | unpacked_data[i] = unpacked_data[0]; | |
1973 | else | |
1974 | unpacked_data[i] = 0; | |
1975 | } | |
fd909089 | 1976 | |
534a3d5c FCE |
1977 | /* consumed a vector from the PKE instruction stream */ |
1978 | vector_num_in ++; | |
db6dac32 | 1979 | } /* unpack word from instruction operand */ |
43a6998b | 1980 | |
9614fb3c FCE |
1981 | /* process STMOD register for accumulation operations */ |
1982 | switch(PKE_REG_MASK_GET(me, MODE, MDE)) | |
1983 | { | |
1984 | case PKE_MODE_ADDROW: /* add row registers to output data */ | |
b59e0b68 | 1985 | case PKE_MODE_ACCROW: /* same .. later conditionally accumulate */ |
9614fb3c FCE |
1986 | for(i=0; i<4; i++) |
1987 | /* exploit R0..R3 contiguity */ | |
1988 | unpacked_data[i] += me->regs[PKE_REG_R0 + i][0]; | |
1989 | break; | |
1990 | ||
9614fb3c FCE |
1991 | case PKE_MODE_INPUT: /* pass data through */ |
1992 | default: /* specified as undefined */ | |
1993 | ; | |
1994 | } | |
1995 | ||
db6dac32 | 1996 | /* compute replacement word */ |
43a6998b FCE |
1997 | if(m) /* use mask register? */ |
1998 | { | |
1999 | /* compute index into mask register for this word */ | |
b4d2f483 | 2000 | int mask_index = PKE_LIMIT(vector_num_out % addrwl, 3); |
43a6998b | 2001 | |
534a3d5c | 2002 | for(i=0; i<4; i++) /* loop over columns */ |
43a6998b FCE |
2003 | { |
2004 | int mask_op = PKE_MASKREG_GET(me, mask_index, i); | |
2005 | unsigned_4* masked_value = NULL; | |
43a6998b FCE |
2006 | |
2007 | switch(mask_op) | |
2008 | { | |
2009 | case PKE_MASKREG_INPUT: | |
9614fb3c | 2010 | masked_value = & unpacked_data[i]; |
b59e0b68 FCE |
2011 | |
2012 | /* conditionally accumulate */ | |
2013 | if(PKE_REG_MASK_GET(me, MODE, MDE) == PKE_MODE_ACCROW) | |
2014 | me->regs[PKE_REG_R0 + i][0] = unpacked_data[i]; | |
2015 | ||
43a6998b FCE |
2016 | break; |
2017 | ||
2018 | case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */ | |
2019 | masked_value = & me->regs[PKE_REG_R0 + i][0]; | |
2020 | break; | |
2021 | ||
2022 | case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */ | |
534a3d5c | 2023 | masked_value = & me->regs[PKE_REG_C0 + mask_index][0]; |
43a6998b FCE |
2024 | break; |
2025 | ||
2026 | case PKE_MASKREG_NOTHING: | |
2027 | /* "write inhibit" by re-copying old data */ | |
2028 | masked_value = & vu_old_data[i]; | |
2029 | break; | |
2030 | ||
2031 | default: | |
2032 | ASSERT(0); | |
2033 | /* no other cases possible */ | |
2034 | } | |
2035 | ||
2036 | /* copy masked value for column */ | |
db6dac32 | 2037 | vu_new_data[i] = *masked_value; |
43a6998b | 2038 | } /* loop over columns */ |
db6dac32 | 2039 | } /* mask */ |
43a6998b FCE |
2040 | else |
2041 | { | |
2042 | /* no mask - just copy over entire unpacked quadword */ | |
2043 | memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data)); | |
b59e0b68 FCE |
2044 | |
2045 | /* conditionally store accumulated row results */ | |
2046 | if(PKE_REG_MASK_GET(me, MODE, MDE) == PKE_MODE_ACCROW) | |
2047 | for(i=0; i<4; i++) | |
2048 | me->regs[PKE_REG_R0 + i][0] = unpacked_data[i]; | |
43a6998b | 2049 | } |
43a6998b | 2050 | |
f0bb94cd FCE |
2051 | /* write new VU data word at address; reverse words if needed */ |
2052 | { | |
2053 | unsigned_16 vu_new_badwords; | |
2054 | * A4_16(& vu_new_badwords, 3) = vu_new_data[0]; | |
2055 | * A4_16(& vu_new_badwords, 2) = vu_new_data[1]; | |
2056 | * A4_16(& vu_new_badwords, 1) = vu_new_data[2]; | |
2057 | * A4_16(& vu_new_badwords, 0) = vu_new_data[3]; | |
2058 | ASSERT(sizeof(vu_new_badwords) == 16); | |
2059 | PKE_MEM_WRITE(me, vu_addr, | |
2060 | &vu_new_badwords, 16); | |
2061 | } | |
2062 | ||
2063 | /* write tracking address */ | |
534a3d5c FCE |
2064 | ASSERT(sizeof(unsigned_4) == 4); |
2065 | PKE_MEM_WRITE(me, vutrack_addr, | |
2066 | & source_addr, | |
2067 | 4); | |
43a6998b FCE |
2068 | |
2069 | /* next vector please */ | |
534a3d5c | 2070 | vector_num_out ++; |
43a6998b | 2071 | } /* vector transfer loop */ |
db6dac32 | 2072 | while(PKE_REG_MASK_GET(me, NUM, NUM) > 0); |
43a6998b | 2073 | |
9614fb3c FCE |
2074 | /* confirm we've written as many vectors as told */ |
2075 | ASSERT(nummx == vector_num_out); | |
2076 | ||
43a6998b FCE |
2077 | /* done */ |
2078 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
e2306992 | 2079 | pke_pc_advance(me, 1 + num_operands); |
43a6998b FCE |
2080 | } /* PKE FIFO full enough */ |
2081 | else | |
2082 | { | |
2083 | /* need to wait for another word */ | |
2084 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); | |
2085 | /* retry this instruction next clock */ | |
2086 | } | |
2087 | } | |
2088 | ||
2089 | ||
2090 | void | |
2091 | pke_code_error(struct pke_device* me, unsigned_4 pkecode) | |
2092 | { | |
9614fb3c FCE |
2093 | /* set ER1 flag in STAT register */ |
2094 | PKE_REG_MASK_SET(me, STAT, ER1, 1); | |
2095 | ||
fd909089 FCE |
2096 | if(! PKE_REG_MASK_GET(me, ERR, ME1)) |
2097 | { | |
9614fb3c | 2098 | pke_begin_interrupt_stall(me); |
fd909089 FCE |
2099 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); |
2100 | } | |
2101 | else | |
2102 | { | |
2103 | PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); | |
2104 | } | |
2105 | ||
43a6998b | 2106 | /* advance over faulty word */ |
43a6998b | 2107 | pke_pc_advance(me, 1); |
fba9bfed | 2108 | } |