X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=sim%2Fmips%2Fsky-pke.c;h=ce5c478a595cb03e4d24397ccbce4134a1a0a6a4;hb=ebcfd86a2ee6c08419b951d8176febaf683e9726;hp=94b650257b25805253fc905e2cb96981690123ca;hpb=fba9bfed2dd8e2fd7e73a2c2c5a33954a3d1cb78;p=deliverable%2Fbinutils-gdb.git diff --git a/sim/mips/sky-pke.c b/sim/mips/sky-pke.c index 94b650257b..ce5c478a59 100644 --- a/sim/mips/sky-pke.c +++ b/sim/mips/sky-pke.c @@ -1,17 +1,26 @@ /* Copyright (C) 1998, Cygnus Solutions */ + +#include "config.h" + #include +#include "sim-main.h" +#include "sim-bits.h" +#include "sim-assert.h" #include "sky-pke.h" #include "sky-dma.h" -#include "sim-assert.h" -#include "sky-vu0.h" -#include "sky-vu1.h" +#include "sky-vu.h" #include "sky-gpuif.h" +#include "sky-device.h" -/* Imported functions */ - -void device_error (device *me, char* message); /* device.c */ +#ifdef HAVE_STRING_H +#include +#else +#ifdef HAVE_STRINGS_H +#include +#endif +#endif /* Internal function declarations */ @@ -20,13 +29,47 @@ static int pke_io_read_buffer(device*, void*, int, address_word, unsigned, sim_cpu*, sim_cia); static int pke_io_write_buffer(device*, const void*, int, address_word, unsigned, sim_cpu*, sim_cia); -static void pke_issue(struct pke_device*); +static void pke_reset(struct pke_device*); +static void pke_issue(SIM_DESC, struct pke_device*); static void pke_pc_advance(struct pke_device*, int num_words); -static unsigned_4* pke_pc_operand(struct pke_device*, int word_num); -static struct fifo_quadword* pke_pc_fifo(struct pke_device*, int word_num); -static int pke_track_write(struct pke_device*, const void* src, int len, - address_word dest, unsigned_4 sourceaddr); +static struct fifo_quadword* pke_pcrel_fifo(struct pke_device*, int operand_num, + unsigned_4** operand); +static unsigned_4* pke_pcrel_operand(struct pke_device*, int operand_num); +static unsigned_4 pke_pcrel_operand_bits(struct pke_device*, int bit_offset, + int bit_width, unsigned_4* sourceaddr); static void pke_attach(SIM_DESC sd, struct pke_device* me); +enum pke_check_target { chk_vu, chk_path1, chk_path2, chk_path3 }; +static int pke_check_stall(struct pke_device* me, enum pke_check_target what); +static void pke_flip_dbf(struct pke_device* me); +static void pke_begin_interrupt_stall(struct pke_device* me); +/* PKEcode handlers */ +static void pke_code_nop(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_offset(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_base(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_itop(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_stmod(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_flushe(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_flush(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_flusha(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_stmask(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_strow(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_stcol(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_mpg(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_direct(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_directhl(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_unpack(struct pke_device* me, unsigned_4 pkecode); +static void pke_code_error(struct pke_device* me, unsigned_4 pkecode); +unsigned_4 pke_fifo_flush(struct pke_fifo*); +void pke_fifo_reset(struct pke_fifo*); +struct fifo_quadword* pke_fifo_fit(struct pke_fifo*); +struct fifo_quadword* pke_fifo_access(struct pke_fifo*, unsigned_4 qwnum); +void pke_fifo_old(struct pke_fifo*, unsigned_4 qwnum); @@ -37,7 +80,10 @@ struct pke_device pke0_device = { "pke0", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */ 0, 0, /* ID, flags */ {}, /* regs */ - NULL, 0, 0, NULL, /* FIFO */ + {}, 0, /* FIFO write buffer */ + { NULL, 0, 0, 0 }, /* FIFO */ + NULL, /* FIFO trace file */ + -1, -1, 0, 0, 0, /* invalid FIFO cache */ 0, 0 /* pc */ }; @@ -47,7 +93,10 @@ struct pke_device pke1_device = { "pke1", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */ 1, 0, /* ID, flags */ {}, /* regs */ - NULL, 0, 0, NULL, /* FIFO */ + {}, 0, /* FIFO write buffer */ + { NULL, 0, 0, 0 }, /* FIFO */ + NULL, /* FIFO trace file */ + -1, -1, 0, 0, 0, /* invalid FIFO cache */ 0, 0 /* pc */ }; @@ -62,12 +111,14 @@ void pke0_attach(SIM_DESC sd) { pke_attach(sd, & pke0_device); + pke_reset(& pke0_device); } void pke1_attach(SIM_DESC sd) { pke_attach(sd, & pke1_device); + pke_reset(& pke1_device); } @@ -75,15 +126,15 @@ pke1_attach(SIM_DESC sd) /* Issue a PKE instruction if possible */ void -pke0_issue() +pke0_issue(SIM_DESC sd) { - pke_issue(& pke0_device); + pke_issue(sd, & pke0_device); } void -pke1_issue() +pke1_issue(SIM_DESC sd) { - pke_issue(& pke0_device); + pke_issue(sd, & pke1_device); } @@ -97,42 +148,117 @@ void pke_attach(SIM_DESC sd, struct pke_device* me) { /* register file */ - sim_core_attach (sd, - NULL, - 0 /*level*/, - access_read_write, - 0 /*space ???*/, + sim_core_attach (sd, NULL, 0, access_read_write, 0, (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START, PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/, 0 /*modulo*/, - (device*) &pke0_device, + (device*) me, NULL /*buffer*/); /* FIFO port */ - sim_core_attach (sd, - NULL, - 0 /*level*/, - access_read_write, - 0 /*space ???*/, + sim_core_attach (sd, NULL, 0, access_read_write, 0, (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR, sizeof(quadword) /*nr_bytes*/, 0 /*modulo*/, - (device*) &pke1_device, + (device*) me, + NULL /*buffer*/); + + /* VU MEM0 tracking table */ + sim_core_attach (sd, NULL, 0, access_read_write, 0, + ((me->pke_number == 0) ? VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START), + ((me->pke_number == 0) ? VU0_MEM0_SIZE : VU1_MEM0_SIZE) / 2, + 0 /*modulo*/, + NULL, NULL /*buffer*/); - /* source-addr tracking word */ - sim_core_attach (sd, - NULL, - 0 /*level*/, - access_read_write, - 0 /*space ???*/, - (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR, - sizeof(unsigned_4) /*nr_bytes*/, + /* VU MEM1 tracking table */ + sim_core_attach (sd, NULL, 0, access_read_write, 0, + ((me->pke_number == 0) ? VU0_MEM1_SRCADDR_START : VU1_MEM1_SRCADDR_START), + ((me->pke_number == 0) ? VU0_MEM1_SIZE : VU1_MEM1_SIZE) / 4, 0 /*modulo*/, - NULL, - zalloc(sizeof(unsigned_4)) /*buffer*/); + NULL, + NULL /*buffer*/); + + + /* attach to trace file if appropriate */ + { + char trace_envvar[80]; + char* trace_filename = NULL; + sprintf(trace_envvar, "VIF%d_TRACE_FILE", me->pke_number); + trace_filename = getenv(trace_envvar); + if(trace_filename != NULL) + { + me->fifo_trace_file = fopen(trace_filename, "w"); + if(me->fifo_trace_file == NULL) + perror("VIF FIFO trace error on fopen"); + else + setvbuf(me->fifo_trace_file, NULL, _IOLBF, 0); + } + } +} + +/* Read PKE Pseudo-PC into buf in target order */ +int +read_pke_pc (struct pke_device *me, void *buf) +{ + *((int *) buf) = H2T_4( me->fifo_pc ); + return 4; } +/* Read PKE reg into buf in target order */ +int +read_pke_reg (struct pke_device *me, int reg_num, void *buf) +{ + /* handle reads to individual registers; clear `readable' on error */ + switch (reg_num) + { + /* handle common case of register reading, side-effect free */ + /* PKE1-only registers*/ + case PKE_REG_BASE: + case PKE_REG_OFST: + case PKE_REG_TOPS: + case PKE_REG_TOP: + case PKE_REG_DBF: + if (me->pke_number == 0) + { + *((int *) buf) = 0; + break; + } + /* fall through */ + + /* PKE0 & PKE1 common registers*/ + case PKE_REG_STAT: + case PKE_REG_ERR: + case PKE_REG_MARK: + case PKE_REG_CYCLE: + case PKE_REG_MODE: + case PKE_REG_NUM: + case PKE_REG_MASK: + case PKE_REG_CODE: + case PKE_REG_ITOPS: + case PKE_REG_ITOP: + case PKE_REG_R0: + case PKE_REG_R1: + case PKE_REG_R2: + case PKE_REG_R3: + case PKE_REG_C0: + case PKE_REG_C1: + case PKE_REG_C2: + case PKE_REG_C3: + *((int *) buf) = H2T_4(me->regs[reg_num][0]); + break; + + /* handle common case of write-only registers */ + case PKE_REG_FBRST: + *((int *) buf) = 0; + break; + + default: + ASSERT(0); /* tests above should prevent this possibility */ + } + + return 4; +} /* Handle a PKE read; return no. of bytes read */ @@ -167,70 +293,17 @@ pke_io_read_buffer(device *me_, /* register bank */ int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4; int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */ - int readable = 1; quadword result; /* clear result */ result[0] = result[1] = result[2] = result[3] = 0; - /* handle reads to individual registers; clear `readable' on error */ - switch(reg_num) - { - /* handle common case of register reading, side-effect free */ - /* PKE1-only registers*/ - case PKE_REG_BASE: - case PKE_REG_OFST: - case PKE_REG_TOPS: - case PKE_REG_TOP: - case PKE_REG_DBF: - if(me->pke_number == 0) - readable = 0; - /* fall through */ - /* PKE0 & PKE1 common registers*/ - case PKE_REG_STAT: - case PKE_REG_ERR: - case PKE_REG_MARK: - case PKE_REG_CYCLE: - case PKE_REG_MODE: - case PKE_REG_NUM: - case PKE_REG_MASK: - case PKE_REG_CODE: - case PKE_REG_ITOPS: - case PKE_REG_ITOP: - case PKE_REG_R0: - case PKE_REG_R1: - case PKE_REG_R2: - case PKE_REG_R3: - case PKE_REG_C0: - case PKE_REG_C1: - case PKE_REG_C2: - case PKE_REG_C3: - result[0] = me->regs[reg_num][0]; - break; - - /* handle common case of write-only registers */ - case PKE_REG_FBRST: - readable = 0; - break; - - default: - ASSERT(0); /* test above should prevent this possibility */ - } + read_pke_reg (me, reg_num, result); /* perform transfer & return */ - if(readable) - { - /* copy the bits */ - memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes); - /* okay */ - return nr_bytes; - } - else - { - /* error */ - return 0; - } + memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes); + return nr_bytes; /* NOTREACHED */ } else if(addr >= my_fifo_addr && @@ -247,8 +320,102 @@ pke_io_read_buffer(device *me_, return 0; } +/* Write PKE reg from buf, which is in target order */ +int +write_pke_reg (struct pke_device *me, int reg_num, const void *buf) +{ + int writeable = 1; + /* make words host-endian */ + unsigned_4 input = T2H_4( *((unsigned_4 *) buf) ); + + /* handle writes to individual registers; clear `writeable' on error */ + switch (reg_num) + { + case PKE_REG_FBRST: + /* Order these tests from least to most overriding, in case + multiple bits are set. */ + if(BIT_MASK_GET(input, PKE_REG_FBRST_STC_B, PKE_REG_FBRST_STC_E)) + { + /* clear a bunch of status bits */ + PKE_REG_MASK_SET(me, STAT, PSS, 0); + PKE_REG_MASK_SET(me, STAT, PFS, 0); + PKE_REG_MASK_SET(me, STAT, PIS, 0); + PKE_REG_MASK_SET(me, STAT, INT, 0); + PKE_REG_MASK_SET(me, STAT, ER0, 0); + PKE_REG_MASK_SET(me, STAT, ER1, 0); + me->flags &= ~PKE_FLAG_PENDING_PSS; + /* will allow resumption of possible stalled instruction */ + } + if(BIT_MASK_GET(input, PKE_REG_FBRST_STP_B, PKE_REG_FBRST_STP_E)) + { + me->flags |= PKE_FLAG_PENDING_PSS; + } + if(BIT_MASK_GET(input, PKE_REG_FBRST_FBK_B, PKE_REG_FBRST_FBK_E)) + { + PKE_REG_MASK_SET(me, STAT, PFS, 1); + } + if(BIT_MASK_GET(input, PKE_REG_FBRST_RST_B, PKE_REG_FBRST_RST_E)) + { + pke_reset(me); + } + break; + + case PKE_REG_ERR: + /* copy bottom three bits */ + BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input, 0, 2)); + break; + + case PKE_REG_MARK: + /* copy bottom sixteen bits */ + PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input, 0, 15)); + /* reset MRK bit in STAT */ + PKE_REG_MASK_SET(me, STAT, MRK, 0); + break; + + /* handle common case of read-only registers */ + /* PKE1-only registers - not really necessary to handle separately */ + case PKE_REG_BASE: + case PKE_REG_OFST: + case PKE_REG_TOPS: + case PKE_REG_TOP: + case PKE_REG_DBF: + if(me->pke_number == 0) + writeable = 0; + /* fall through */ + /* PKE0 & PKE1 common registers*/ + case PKE_REG_STAT: + /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */ + case PKE_REG_CYCLE: + case PKE_REG_MODE: + case PKE_REG_NUM: + case PKE_REG_MASK: + case PKE_REG_CODE: + case PKE_REG_ITOPS: + case PKE_REG_ITOP: + case PKE_REG_R0: + case PKE_REG_R1: + case PKE_REG_R2: + case PKE_REG_R3: + case PKE_REG_C0: + case PKE_REG_C1: + case PKE_REG_C2: + case PKE_REG_C3: + writeable = 0; + break; + + default: + ASSERT(0); /* test above should prevent this possibility */ + } + + /* perform return */ + if(! writeable) + { + return 0; /* error */ + } -/* Handle a PKE read; return no. of bytes written */ + return 4; +} +/* Handle a PKE write; return no. of bytes written */ int pke_io_write_buffer(device *me_, @@ -280,7 +447,6 @@ pke_io_write_buffer(device *me_, /* register bank */ int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4; int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */ - int writeable = 1; quadword input; /* clear input */ @@ -289,101 +455,8 @@ pke_io_write_buffer(device *me_, /* write user-given bytes into input */ memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes); - /* handle writes to individual registers; clear `writeable' on error */ - switch(reg_num) - { - case PKE_REG_FBRST: - /* XXX: order of evaluation? STP && STC ?? */ - if(BIT_MASK_GET(input[0], 0, 0)) /* RST bit */ - { - /* clear FIFO: also prevents re-execution attempt of - possible stalled instruction */ - me->fifo_num_elements = me->fifo_pc; - /* clear registers */ - memset(me->regs, 0, sizeof(me->regs)); - me->flags = 0; - me->qw_pc = 0; - } - if(BIT_MASK_GET(input[0], 1, 1)) /* FBK bit */ - { - PKE_REG_MASK_SET(me, STAT, PFS, 1); - } - if(BIT_MASK_GET(input[0], 2, 2)) /* STP bit */ - { - /* XXX: how to safely abort "currently executing" (=> stalled) instruction? */ - PKE_REG_MASK_SET(me, STAT, PSS, 1); - } - if(BIT_MASK_GET(input[0], 2, 2)) /* STC bit */ - { - /* clear a bunch of status bits */ - PKE_REG_MASK_SET(me, STAT, PSS, 0); - PKE_REG_MASK_SET(me, STAT, PFS, 0); - PKE_REG_MASK_SET(me, STAT, PIS, 0); - PKE_REG_MASK_SET(me, STAT, INT, 0); - PKE_REG_MASK_SET(me, STAT, ER0, 0); - PKE_REG_MASK_SET(me, STAT, ER1, 0); - /* will allow resumption of possible stalled instruction */ - } - break; - - case PKE_REG_ERR: - /* copy bottom three bits */ - BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input[0], 0, 2)); - break; - - case PKE_REG_MARK: - /* copy bottom sixteen bits */ - PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input[0], 0, 15)); - /* reset MRK bit in STAT */ - PKE_REG_MASK_SET(me, STAT, MRK, 0); - break; - - /* handle common case of read-only registers */ - /* PKE1-only registers - not really necessary to handle separately */ - case PKE_REG_BASE: - case PKE_REG_OFST: - case PKE_REG_TOPS: - case PKE_REG_TOP: - case PKE_REG_DBF: - if(me->pke_number == 0) - writeable = 0; - /* fall through */ - /* PKE0 & PKE1 common registers*/ - case PKE_REG_STAT: - /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */ - case PKE_REG_CYCLE: - case PKE_REG_MODE: - case PKE_REG_NUM: - case PKE_REG_MASK: - case PKE_REG_CODE: - case PKE_REG_ITOPS: - case PKE_REG_ITOP: - case PKE_REG_R0: - case PKE_REG_R1: - case PKE_REG_R2: - case PKE_REG_R3: - case PKE_REG_C0: - case PKE_REG_C1: - case PKE_REG_C2: - case PKE_REG_C3: - writeable = 0; - break; - - default: - ASSERT(0); /* test above should prevent this possibility */ - } - - /* perform return */ - if(writeable) - { - /* okay */ - return nr_bytes; - } - else - { - /* error */ - return 0; - } + write_pke_reg (me, reg_num, input); + return nr_bytes; /* NOTREACHED */ } @@ -392,43 +465,51 @@ pke_io_write_buffer(device *me_, { /* FIFO */ struct fifo_quadword* fqw; - - /* assert transfer size == 128 bits */ - if(nr_bytes != sizeof(quadword)) - return 0; - - /* ensure FIFO has enough elements */ - if(me->fifo_num_elements == me->fifo_buffer_size) + int fifo_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside fifo quadword */ + unsigned_4 dma_tag_present = 0; + int i; + + /* collect potentially-partial quadword in write buffer; LE byte order */ + memcpy(((unsigned_1*)& me->fifo_qw_in_progress) + fifo_byte, src, nr_bytes); + /* mark bytes written */ + for(i = fifo_byte; i < fifo_byte + nr_bytes; i++) + BIT_MASK_SET(me->fifo_qw_done, i, i, 1); + + /* return if quadword not quite written yet */ + if(BIT_MASK_GET(me->fifo_qw_done, 0, sizeof(quadword)-1) != + BIT_MASK_BTW(0, sizeof(quadword)-1)) + return nr_bytes; + + /* all done - process quadword after clearing flag */ + BIT_MASK_SET(me->fifo_qw_done, 0, sizeof(quadword)-1, 0); + + /* allocate required address in FIFO */ + fqw = pke_fifo_fit(& me->fifo); + ASSERT(fqw != NULL); + + /* fill in unclassified FIFO quadword data in host byte order */ + fqw->word_class[0] = fqw->word_class[1] = + fqw->word_class[2] = fqw->word_class[3] = wc_unknown; + fqw->data[0] = T2H_4(me->fifo_qw_in_progress[0]); + fqw->data[1] = T2H_4(me->fifo_qw_in_progress[1]); + fqw->data[2] = T2H_4(me->fifo_qw_in_progress[2]); + fqw->data[3] = T2H_4(me->fifo_qw_in_progress[3]); + + /* read DMAC-supplied indicators */ + ASSERT(sizeof(unsigned_4) == 4); + PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_MADR : DMA_D1_MADR), + & fqw->source_address, /* converted to host-endian */ + 4); + PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_PKTFLAG : DMA_D1_PKTFLAG), + & dma_tag_present, + 4); + + if(dma_tag_present) { - /* time to grow */ - int new_fifo_buffer_size = me->fifo_buffer_size + 20; - void* ptr = realloc((void*) me->fifo, new_fifo_buffer_size*sizeof(quadword)); - - if(ptr == NULL) - { - /* oops, cannot enlarge FIFO any more */ - device_error(me_, "Cannot enlarge FIFO buffer\n"); - return 0; - } - - me->fifo_buffer_size = new_fifo_buffer_size; + /* lower two words are DMA tags */ + fqw->word_class[0] = fqw->word_class[1] = wc_dma; } - /* add new quadword at end of FIFO */ - fqw = & me->fifo[me->fifo_num_elements]; - memcpy((void*) fqw->data, src, nr_bytes); - sim_read(CPU_STATE(cpu), - (SIM_ADDR) (me->pke_number == 0 ? DMA_CHANNEL0_SRCADDR : DMA_CHANNEL1_SRCADDR), - (void*) & fqw->source_address, - sizeof(address_word)); - sim_read(CPU_STATE(cpu), - (SIM_ADDR) (me->pke_number == 0 ? DMA_CHANNEL0_PKTFLAG : DMA_CHANNEL1_PKTFLAG), - (void*) & fqw->dma_tag_present, - sizeof(unsigned_4)); - /* XXX: check RC */ - - me->fifo_num_elements++; - /* set FQC to "1" as FIFO is now not empty */ PKE_REG_MASK_SET(me, STAT, FQC, 1); @@ -442,961 +523,1603 @@ pke_io_write_buffer(device *me_, +/* Reset the PKE */ +void +pke_reset(struct pke_device* me) +{ + /* advance PC over last quadword in FIFO; keep previous FIFO history */ + me->fifo_pc = pke_fifo_flush(& me->fifo); + me->qw_pc = 0; + /* clear registers, flag, other state */ + memset(me->regs, 0, sizeof(me->regs)); + me->fifo_qw_done = 0; + me->flags = 0; +} + + + /* Issue & swallow next PKE opcode if possible/available */ void -pke_issue(struct pke_device* me) +pke_issue(SIM_DESC sd, struct pke_device* me) { struct fifo_quadword* fqw; unsigned_4 fw; - unsigned_4 cmd, intr, num; - unsigned_4 imm; - int next_pps_state; /* PPS after this instruction issue attempt */ - - /* 1 -- test go / no-go for PKE execution */ + unsigned_4 cmd, intr; - /* check for stall/halt control bits */ - /* XXX: What is the PEW bit for? */ - if(PKE_REG_MASK_GET(me, STAT, PSS) || - PKE_REG_MASK_GET(me, STAT, PFS) || - /* maskable stall controls: ER0, ER1, PIS */ - (PKE_REG_MASK_GET(me, STAT, ER0) && !PKE_REG_MASK_GET(me, ERR, ME0)) || - (PKE_REG_MASK_GET(me, STAT, ER1) && !PKE_REG_MASK_GET(me, ERR, ME1)) || - (PKE_REG_MASK_GET(me, STAT, PIS) && !PKE_REG_MASK_GET(me, ERR, MII))) - { - /* XXX */ - } - /* XXX: handle PSS by *skipping* instruction? */ + /* 1 -- fetch PKE instruction */ /* confirm availability of new quadword of PKE instructions */ - if(me->fifo_num_elements <= me->fifo_pc) + fqw = pke_fifo_access(& me->fifo, me->fifo_pc); + if(fqw == NULL) return; + /* skip over DMA tag, if present */ + pke_pc_advance(me, 0); + /* note: this can only change qw_pc from 0 to 2 and will not + invalidate fqw */ + + /* "fetch" instruction quadword and word */ + fw = fqw->data[me->qw_pc]; + + /* store word in PKECODE register */ + me->regs[PKE_REG_CODE][0] = fw; - /* 2 -- fetch PKE instruction */ - /* "fetch" instruction quadword */ - fqw = & me->fifo[me->fifo_pc]; + /* 2 -- test go / no-go for PKE execution */ - /* skip over DMA tags, if present */ - if((fqw->dma_tag_present != 0) && (me->qw_pc < 2)) + /* switch on STAT:PSS if PSS-pending and in idle state */ + if((PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE) && + (me->flags & PKE_FLAG_PENDING_PSS) != 0) { - ASSERT(me->qw_pc == 0); - /* XXX: check validity of DMA tag; if bad, set ER0 flag */ - me->qw_pc = 2; + me->flags &= ~PKE_FLAG_PENDING_PSS; + PKE_REG_MASK_SET(me, STAT, PSS, 1); } - - /* "fetch" instruction word */ - fw = fqw->data[me->qw_pc]; - /* store it in PKECODE register */ - me->regs[PKE_REG_CODE][0] = fw; + /* check for stall/halt control bits */ + if(PKE_REG_MASK_GET(me, STAT, PFS) || + PKE_REG_MASK_GET(me, STAT, PSS) || /* note special treatment below */ + /* PEW bit not a reason to keep stalling - it's just an indication, re-computed below */ + /* PGW bit not a reason to keep stalling - it's just an indication, re-computed below */ + /* ER0/ER1 not a reason to keep stalling - it's just an indication */ + PKE_REG_MASK_GET(me, STAT, PIS)) + { + /* (still) stalled */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); + /* try again next cycle */ + return; + } /* 3 -- decode PKE instruction */ - /* PKE instruction format: [intr 0:0][pke-command 6:0][num 7:0][immediate 15:0], - so op-code is in top byte. */ + /* decoding */ + if(PKE_REG_MASK_GET(me, STAT, PPS) == PKE_REG_STAT_PPS_IDLE) + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE); + + /* Extract relevant bits from PKEcode */ intr = BIT_MASK_GET(fw, PKE_OPCODE_I_B, PKE_OPCODE_I_E); cmd = BIT_MASK_GET(fw, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E); - num = BIT_MASK_GET(fw, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E); - imm = BIT_MASK_GET(fw, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + /* handle interrupts */ if(intr) { - /* set INT flag in STAT register */ - PKE_REG_MASK_SET(me, STAT, INT, 1); - /* XXX: send interrupt to R5900? */ + /* are we resuming an interrupt-stalled instruction? */ + if(me->flags & PKE_FLAG_INT_NOLOOP) + { + /* clear loop-prevention flag */ + me->flags &= ~PKE_FLAG_INT_NOLOOP; + + /* fall through to decode & execute */ + /* The pke_code_* functions should not check the MSB in the + pkecode. */ + } + else /* new interrupt-flagged instruction */ + { + /* set INT flag in STAT register */ + PKE_REG_MASK_SET(me, STAT, INT, 1); + /* set loop-prevention flag */ + me->flags |= PKE_FLAG_INT_NOLOOP; + + /* set PIS if stall not masked */ + if(!PKE_REG_MASK_GET(me, ERR, MII)) + pke_begin_interrupt_stall(me); + + /* suspend this instruction unless it's PKEMARK */ + if(!IS_PKE_CMD(cmd, PKEMARK)) + { + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); + return; + } + else + { + ; /* fall through to decode & execute */ + } + } } - /* decoding */ - PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE); - next_pps_state = PKE_REG_STAT_PPS_IDLE; /* assume instruction completes */ - /* decode */ + /* decode & execute */ if(IS_PKE_CMD(cmd, PKENOP)) - { - /* no work required, yey */ - pke_pc_advance(me, 1); - } + pke_code_nop(me, fw); else if(IS_PKE_CMD(cmd, STCYCL)) - { - /* copy immediate value into CYCLE reg */ - me->regs[PKE_REG_CYCLE][0] = imm; - pke_pc_advance(me, 1); - } + pke_code_stcycl(me, fw); else if(me->pke_number == 1 && IS_PKE_CMD(cmd, OFFSET)) - { - /* copy 10 bits to OFFSET field */ - PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9)); - /* clear DBF bit */ - PKE_REG_MASK_SET(me, DBF, DF, 0); - /* clear other DBF bit */ - PKE_REG_MASK_SET(me, STAT, DBF, 0); - /* set TOPS = BASE */ - PKE_REG_MASK_SET(me, TOPS, TOPS, - PKE_REG_MASK_GET(me, BASE, BASE)); - pke_pc_advance(me, 1); - } + pke_code_offset(me, fw); else if(me->pke_number == 1 && IS_PKE_CMD(cmd, BASE)) - { - /* copy 10 bits to BASE field */ - PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9)); - /* clear DBF bit */ - PKE_REG_MASK_SET(me, DBF, DF, 0); - /* clear other DBF bit */ - PKE_REG_MASK_SET(me, STAT, DBF, 0); - /* set TOPS = BASE */ - PKE_REG_MASK_SET(me, TOPS, TOPS, - PKE_REG_MASK_GET(me, BASE, BASE)); - pke_pc_advance(me, 1); - } + pke_code_base(me, fw); else if(IS_PKE_CMD(cmd, ITOP)) - { - /* copy 10 bits to ITOPS field */ - PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9)); - pke_pc_advance(me, 1); - } + pke_code_itop(me, fw); else if(IS_PKE_CMD(cmd, STMOD)) - { - /* copy 2 bits to MODE register */ - PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2)); - pke_pc_advance(me, 1); - } - else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3)) /* MSKPATH3 */ - { - /* XXX: what to do with this? DMA control register? */ - pke_pc_advance(me, 1); - } + pke_code_stmod(me, fw); + else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3)) + pke_code_mskpath3(me, fw); else if(IS_PKE_CMD(cmd, PKEMARK)) - { - /* copy 16 bits to MARK register */ - PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15)); - /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */ - PKE_REG_MASK_SET(me, STAT, MRK, 1); - pke_pc_advance(me, 1); - } + pke_code_pkemark(me, fw); else if(IS_PKE_CMD(cmd, FLUSHE)) - { - /* read VU status word */ - unsigned_4 vu_stat; - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), - (void*) & vu_stat, - sizeof(unsigned_4)); - /* XXX: check RC */ - - /* check if VBS bit is clear, i.e., VU is idle */ - if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) - { - /* VU idle */ - /* advance PC */ - pke_pc_advance(me, 1); - } - else - { - /* VU busy */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } - } + pke_code_flushe(me, fw); else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSH)) - { - /* read VU status word */ - unsigned_4 vu_stat; - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), - (void*) & vu_stat, - sizeof(unsigned_4)); - /* XXX: check RC */ - - /* check if VGW bit is clear, i.e., PATH1 is idle */ - /* simulator design implies PATH2 is always "idle" */ - if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 && - BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 && - 1 /* PATH2 always idle */) - { - /* VU idle */ - /* PATH1 idle */ - /* PATH2 idle */ - /* advance PC */ - pke_pc_advance(me, 1); - } - else - { - /* GPUIF busy */ - /* retry this instruction next clock */ - } - } + pke_code_flush(me, fw); else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA)) - { - /* read VU status word */ - unsigned_4 vu_stat; - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), - (void*) & vu_stat, - sizeof(unsigned_4)); - /* XXX: check RC */ - - /* check if VGW bit is clear, i.e., PATH1 is idle */ - /* simulator design implies PATH2 is always "idle" */ - /* XXX: simulator design implies PATH3 is always "idle" */ - if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 && - BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 && - 1 /* PATH2 always idle */ && - 1 /* PATH3 always idle */) - { - /* VU idle */ - /* PATH1 idle */ - /* PATH2 idle */ - /* PATH3 idle */ - /* advance PC */ - pke_pc_advance(me, 1); - } - else - { - /* GPUIF busy */ - /* retry this instruction next clock */ - } - } + pke_code_flusha(me, fw); else if(IS_PKE_CMD(cmd, PKEMSCAL)) - { - /* read VU status word */ - unsigned_4 vu_stat; - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), - (void*) & vu_stat, - sizeof(unsigned_4)); - /* XXX: check RC */ - - /* check if VBS bit is clear, i.e., VU is idle */ - if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) - { - /* VU idle */ - unsigned_4 vu_pc; + pke_code_pkemscal(me, fw); + else if(IS_PKE_CMD(cmd, PKEMSCNT)) + pke_code_pkemscnt(me, fw); + else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF)) + pke_code_pkemscalf(me, fw); + else if(IS_PKE_CMD(cmd, STMASK)) + pke_code_stmask(me, fw); + else if(IS_PKE_CMD(cmd, STROW)) + pke_code_strow(me, fw); + else if(IS_PKE_CMD(cmd, STCOL)) + pke_code_stcol(me, fw); + else if(IS_PKE_CMD(cmd, MPG)) + pke_code_mpg(me, fw); + else if(IS_PKE_CMD(cmd, DIRECT)) + pke_code_direct(me, fw); + else if(IS_PKE_CMD(cmd, DIRECTHL)) + pke_code_directhl(me, fw); + else if(IS_PKE_CMD(cmd, UNPACK)) + pke_code_unpack(me, fw); + /* ... no other commands ... */ + else + pke_code_error(me, fw); +} - /* perform PKE1-unique processing for microprogram calls */ - if(me->pke_number == 1) - { - /* flip DBF */ - PKE_REG_MASK_SET(me, DBF, DF, - PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1); - PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF)); - /* compute new TOPS */ - PKE_REG_MASK_SET(me, TOPS, TOPS, - (PKE_REG_MASK_GET(me, BASE, BASE) + - (PKE_REG_MASK_GET(me, DBF, DF) * - PKE_REG_MASK_GET(me, OFST, OFFSET)))); - /* compute new ITOP and TOP */ - PKE_REG_MASK_SET(me, ITOP, ITOP, - PKE_REG_MASK_GET(me, ITOPS, ITOPS)); - PKE_REG_MASK_SET(me, TOP, TOP, - PKE_REG_MASK_GET(me, TOPS, TOPS)); - } - /* compute new PC */ - vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */ - /* write new PC; callback function gets VU running */ - sim_write(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), - (void*) & vu_pc, - sizeof(unsigned_4)); - /* advance PC */ - pke_pc_advance(me, 1); - } - else - { - /* VU busy */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } - } - else if(IS_PKE_CMD(cmd, PKEMSCNT)) - { - /* read VU status word */ - unsigned_4 vu_stat; - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), - (void*) & vu_stat, - sizeof(unsigned_4)); - /* XXX: check RC */ - - /* check if VBS bit is clear, i.e., VU is idle */ - if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) - { - /* VU idle */ - unsigned_4 vu_pc; - /* flip DBF etc. for PKE1 */ - if(me->pke_number == 1) - { - PKE_REG_MASK_SET(me, DBF, DF, - PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1); - PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF)); - PKE_REG_MASK_SET(me, TOPS, TOPS, - (PKE_REG_MASK_GET(me, BASE, BASE) + - (PKE_REG_MASK_GET(me, DBF, DF) * - PKE_REG_MASK_GET(me, OFST, OFFSET)))); - PKE_REG_MASK_SET(me, ITOP, ITOP, - PKE_REG_MASK_GET(me, ITOPS, ITOPS)); - PKE_REG_MASK_SET(me, TOP, TOP, - PKE_REG_MASK_GET(me, TOPS, TOPS)); - } +/* Clear out contents of FIFO; act as if it was empty. Return PC + pointing to one-past-last word. */ - /* read old PC */ - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), - (void*) & vu_pc, - sizeof(unsigned_4)); - /* rewrite its PC; callback function gets VU running */ - sim_write(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), - (void*) & vu_pc, - sizeof(unsigned_4)); - /* advance PC */ - pke_pc_advance(me, 1); - } - else - { - /* VU busy */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } - } - else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF)) - { - /* read VU status word */ - unsigned_4 vu_stat; - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), - (void*) & vu_stat, - sizeof(unsigned_4)); - /* XXX: check RC */ - - /* check if VGW bit is clear, i.e., PATH1 is idle */ - /* simulator design implies PATH2 is always "idle" */ - if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 && - BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 && - 1 /* PATH2 always idle */) - { - /* VU idle */ - /* PATH1 idle */ - /* PATH2 idle */ - unsigned_4 vu_pc; +unsigned_4 +pke_fifo_flush(struct pke_fifo* fifo) +{ + /* don't modify any state! */ + return fifo->origin + fifo->next; +} - /* flip DBF etc. for PKE1 */ - if(me->pke_number == 1) - { - PKE_REG_MASK_SET(me, DBF, DF, - PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1); - PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF)); - PKE_REG_MASK_SET(me, TOPS, TOPS, - (PKE_REG_MASK_GET(me, BASE, BASE) + - (PKE_REG_MASK_GET(me, DBF, DF) * - PKE_REG_MASK_GET(me, OFST, OFFSET)))); - PKE_REG_MASK_SET(me, ITOP, ITOP, - PKE_REG_MASK_GET(me, ITOPS, ITOPS)); - PKE_REG_MASK_SET(me, TOP, TOP, - PKE_REG_MASK_GET(me, TOPS, TOPS)); - } - /* compute new PC */ - vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */ - /* write new PC; callback function gets VU running */ - sim_write(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), - (void*) & vu_pc, - sizeof(unsigned_4)); - /* advance PC */ - pke_pc_advance(me, 1); - } - else - { - /* VU busy */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } - } - else if(IS_PKE_CMD(cmd, STMASK)) - { - /* check that FIFO has one more word for STMASK operand */ - unsigned_4* mask; - mask = pke_pc_operand(me, 1); - if(mask != NULL) - { - /* "transferring" operand */ - PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); - /* fill the register */ - PKE_REG_MASK_SET(me, MASK, MASK, *mask); - /* advance PC */ - pke_pc_advance(me, 2); - } - else - { - /* need to wait for another word */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } - } - else if(IS_PKE_CMD(cmd, STROW)) +/* Clear out contents of FIFO; make it really empty. */ + +void +pke_fifo_reset(struct pke_fifo* fifo) +{ + int i; + + /* clear fifo quadwords */ + for(i=0; inext; i++) { - /* check that FIFO has four more words for STROW operand */ - unsigned_4* last_op; + zfree(fifo->quadwords[i]); + fifo->quadwords[i] = NULL; + } - last_op = pke_pc_operand(me, 4); - if(last_op != NULL) - { - /* "transferring" operand */ - PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + /* reset pointers */ + fifo->origin = 0; + fifo->next = 0; +} - /* copy ROW registers: must all exist if 4th operand exists */ - me->regs[PKE_REG_R0][0] = * pke_pc_operand(me, 1); - me->regs[PKE_REG_R1][0] = * pke_pc_operand(me, 2); - me->regs[PKE_REG_R2][0] = * pke_pc_operand(me, 3); - me->regs[PKE_REG_R3][0] = * pke_pc_operand(me, 4); - /* advance PC */ - pke_pc_advance(me, 5); - } - else + +/* Make space for the next quadword in the FIFO. Allocate/enlarge + FIFO pointer block if necessary. Return a pointer to it. */ + +struct fifo_quadword* +pke_fifo_fit(struct pke_fifo* fifo) +{ + struct fifo_quadword* fqw; + + /* out of space on quadword pointer array? */ + if(fifo->next == fifo->length) /* also triggered before fifo->quadwords allocated */ + { + struct fifo_quadword** new_qw; + unsigned_4 new_length = fifo->length + PKE_FIFO_GROW_SIZE; + + /* allocate new pointer block */ + new_qw = zalloc(new_length * sizeof(struct fifo_quadword*)); + ASSERT(new_qw != NULL); + + /* copy over old contents, if any */ + if(fifo->quadwords != NULL) { - /* need to wait for another word */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ + /* copy over old pointers to beginning of new block */ + memcpy(new_qw, fifo->quadwords, + fifo->length * sizeof(struct fifo_quadword*)); + + /* free old block */ + zfree(fifo->quadwords); } + + /* replace pointers & counts */ + fifo->quadwords = new_qw; + fifo->length = new_length; } - else if(IS_PKE_CMD(cmd, STCOL)) - { - /* check that FIFO has four more words for STCOL operand */ - unsigned_4* last_op; - last_op = pke_pc_operand(me, 4); - if(last_op != NULL) - { - /* "transferring" operand */ - PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + /* sanity check */ + ASSERT(fifo->quadwords != NULL); - /* copy COL registers: must all exist if 4th operand exists */ - me->regs[PKE_REG_C0][0] = * pke_pc_operand(me, 1); - me->regs[PKE_REG_C1][0] = * pke_pc_operand(me, 2); - me->regs[PKE_REG_C2][0] = * pke_pc_operand(me, 3); - me->regs[PKE_REG_C3][0] = * pke_pc_operand(me, 4); + /* allocate new quadword from heap */ + fqw = zalloc(sizeof(struct fifo_quadword)); + ASSERT(fqw != NULL); - /* advance PC */ - pke_pc_advance(me, 5); - } - else - { - /* need to wait for another word */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } + /* push quadword onto fifo */ + fifo->quadwords[fifo->next] = fqw; + fifo->next++; + return fqw; +} + + + +/* Return a pointer to the FIFO quadword with given absolute index, or + NULL if it is out of range */ + +struct fifo_quadword* +pke_fifo_access(struct pke_fifo* fifo, unsigned_4 qwnum) +{ + struct fifo_quadword* fqw; + + if((qwnum < fifo->origin) || /* before history */ + (qwnum >= fifo->origin + fifo->next)) /* after last available quadword */ + fqw = NULL; + else + { + ASSERT(fifo->quadwords != NULL); /* must be allocated already */ + fqw = fifo->quadwords[qwnum - fifo->origin]; /* pull out pointer from array */ + ASSERT(fqw != NULL); /* must be allocated already */ } - else if(IS_PKE_CMD(cmd, MPG)) + + return fqw; +} + + +/* Authorize release of any FIFO entries older than given absolute quadword. */ +void +pke_fifo_old(struct pke_fifo* fifo, unsigned_4 qwnum) +{ + /* do we have any too-old FIFO elements? */ + if(fifo->origin + PKE_FIFO_ARCHEOLOGY < qwnum) { - unsigned_4* last_mpg_word; + /* count quadwords to forget */ + int horizon = qwnum - (fifo->origin + PKE_FIFO_ARCHEOLOGY); + int i; + + /* free quadwords at indices below horizon */ + for(i=0; i < horizon; i++) + zfree(fifo->quadwords[i]); + + /* move surviving quadword pointers down to beginning of array */ + for(i=horizon; i < fifo->next; i++) + fifo->quadwords[i-horizon] = fifo->quadwords[i]; + + /* clear duplicate pointers */ + for(i=fifo->next - horizon; i < fifo->next; i++) + fifo->quadwords[i] = NULL; + + /* adjust FIFO pointers */ + fifo->origin = fifo->origin + horizon; + fifo->next = fifo->next - horizon; + } +} - /* map zero to max+1 */ - if(num==0) num=0x100; - /* check that FIFO has a few more words for MPG operand */ - last_mpg_word = pke_pc_operand(me, num*2); /* num: number of 64-bit words */ - if(last_mpg_word != NULL) + + +/* advance the PC by given number of data words; update STAT/FQC + field; assume FIFO is filled enough; classify passed-over words; + write FIFO trace line */ + +void +pke_pc_advance(struct pke_device* me, int num_words) +{ + int num = num_words; + struct fifo_quadword* fq = NULL; + unsigned_4 old_fifo_pc = me->fifo_pc; + + ASSERT(num_words >= 0); + + /* printf("pke %d pc_advance num_words %d\n", me->pke_number, num_words); */ + + while(1) + { + /* find next quadword, if any */ + fq = pke_fifo_access(& me->fifo, me->fifo_pc); + + /* skip over DMA tag words if present in word 0 or 1 */ + if(fq != NULL && fq->word_class[me->qw_pc] == wc_dma) { - /* perform implied FLUSHE */ - /* read VU status word */ - unsigned_4 vu_stat; - sim_read(NULL, - (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), - (void*) & vu_stat, - sizeof(unsigned_4)); - /* XXX: check RC */ - - /* check if VBS bit is clear, i.e., VU is idle */ - if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) - { - /* VU idle */ - int i; + /* skip by going around loop an extra time */ + num ++; + } + + /* nothing left to skip / no DMA tag here */ + if(num == 0) + break; - /* "transferring" operand */ - PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + /* we are supposed to skip existing words */ + ASSERT(fq != NULL); - /* transfer VU instructions, one word per iteration */ - for(i=0; ipke_number == 0) ? - VU0_MEM0_WINDOW_START : VU0_MEM0_WINDOW_START; - vu_addr = vu_addr_base + (imm*2) + i; - - /* VU*_MEM0_TRACK : source-addr tracking table */ - vutrack_addr_base = (me->pke_number == 0) ? - VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START; - vutrack_addr = vu_addr_base + (imm*2) + i; - - /* write data into VU memory */ - pke_track_write(me, operand, sizeof(unsigned_4), - vu_addr, fq->source_address); - - /* write srcaddr into VU srcaddr tracking table */ - sim_write(NULL, - (SIM_ADDR) vutrack_addr, - (void*) & fq->source_address, - sizeof(unsigned_4)); - /* XXX: check RC */ - } /* VU xfer loop */ - - /* advance PC */ - pke_pc_advance(me, 1 + num*2); - } - else + /* one word skipped */ + num --; + + /* point to next word */ + me->qw_pc ++; + if(me->qw_pc == 4) + { + me->qw_pc = 0; + me->fifo_pc ++; + + /* trace the consumption of the FIFO quadword we just skipped over */ + /* fq still points to it */ + if(me->fifo_trace_file != NULL) { - /* VU busy */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ + /* assert complete classification */ + ASSERT(fq->word_class[3] != wc_unknown); + ASSERT(fq->word_class[2] != wc_unknown); + ASSERT(fq->word_class[1] != wc_unknown); + ASSERT(fq->word_class[0] != wc_unknown); + + /* print trace record */ + fprintf(me->fifo_trace_file, + "%d 0x%08x_%08x_%08x_%08x 0x%08x %c%c%c%c\n", + (me->pke_number == 0 ? 0 : 1), + (unsigned) fq->data[3], (unsigned) fq->data[2], + (unsigned) fq->data[1], (unsigned) fq->data[0], + (unsigned) fq->source_address, + fq->word_class[3], fq->word_class[2], + fq->word_class[1], fq->word_class[0]); } - } /* if FIFO full enough */ - else - { - /* need to wait for another word */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } + } /* next quadword */ + } + + /* age old entries before PC */ + if(me->fifo_pc != old_fifo_pc) + { + /* we advanced the fifo-pc; authorize disposal of anything + before previous PKEcode */ + pke_fifo_old(& me->fifo, old_fifo_pc); } - else if(IS_PKE_CMD(cmd, DIRECT) || IS_PKE_CMD(cmd, DIRECTHL)) /* treat identically */ + + /* clear FQC if FIFO is now empty */ + fq = pke_fifo_access(& me->fifo, me->fifo_pc); + if(fq == NULL) + { + PKE_REG_MASK_SET(me, STAT, FQC, 0); + } + else /* annote the word where the PC lands as an PKEcode */ { - /* check that FIFO has a few more words for DIRECT operand */ - unsigned_4* last_direct_word; + ASSERT(fq->word_class[me->qw_pc] == wc_pkecode || fq->word_class[me->qw_pc] == wc_unknown); + fq->word_class[me->qw_pc] = wc_pkecode; + } +} - /* map zero to max+1 */ - if(imm==0) imm=0x10000; - last_direct_word = pke_pc_operand(me, imm*4); /* num: number of 128-bit words */ - if(last_direct_word != NULL) - { - /* VU idle */ - int i; - quadword fifo_data; - /* "transferring" operand */ - PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); - /* transfer GPUIF quadwords, one word per iteration */ - for(i=0; isource_address); - /* XXX: check RC */ - } /* write collected quadword */ +/* Return pointer to FIFO quadword containing given operand# in FIFO. + `operand_num' starts at 1. Return pointer to operand word in last + argument, if non-NULL. If FIFO is not full enough, return 0. + Signal an ER0 indication upon skipping a DMA tag. */ - } /* GPUIF xfer loop */ - - /* advance PC */ - pke_pc_advance(me, 1 + imm*4); - } /* if FIFO full enough */ - else - { - /* need to wait for another word */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } +struct fifo_quadword* +pke_pcrel_fifo(struct pke_device* me, int operand_num, unsigned_4** operand) +{ + int num; + int new_qw_pc, new_fifo_pc; + struct fifo_quadword* fq = NULL; + + /* check for validity of last search results in cache */ + if(me->last_fifo_pc == me->fifo_pc && + me->last_qw_pc == me->qw_pc && + operand_num > me->last_num) + { + /* continue search from last stop */ + new_fifo_pc = me->last_new_fifo_pc; + new_qw_pc = me->last_new_qw_pc; + num = operand_num - me->last_num; } - else if(IS_PKE_CMD(cmd, UNPACK)) /* warning: monster complexity */ + else { - short vn = BIT_MASK_GET(cmd, 2, 3); - short vl = BIT_MASK_GET(cmd, 0, 1); - short vnvl = BIT_MASK_GET(cmd, 0, 3); - int m = BIT_MASK_GET(cmd, 4, 4); - short cl = PKE_REG_MASK_GET(me, CYCLE, CL); - short wl = PKE_REG_MASK_GET(me, CYCLE, WL); - int n, num_operands; - unsigned_4* last_operand_word; - - /* map zero to max+1 */ - if(num==0) num=0x100; - - /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */ - if(wl <= cl) - n = num; - else - n = cl * (num/wl) + PKE_LIMIT(num % wl, cl); - num_operands = (((sizeof(unsigned_4) >> vl) * (vn+1) * n)/sizeof(unsigned_4)); + /* start search from scratch */ + new_fifo_pc = me->fifo_pc; + new_qw_pc = me->qw_pc; + num = operand_num; + } + + ASSERT(num > 0); + + /* printf("pke %d pcrel_fifo operand_num %d\n", me->pke_number, operand_num); */ - /* confirm that FIFO has enough words in it */ - last_operand_word = pke_pc_operand(me, num_operands); - if(last_operand_word != NULL) + do + { + /* one word skipped */ + num --; + + /* point to next word */ + new_qw_pc ++; + if(new_qw_pc == 4) { - address_word vu_addr_base; - int operand_num, vector_num; + new_qw_pc = 0; + new_fifo_pc ++; + } - /* "transferring" operand */ - PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + fq = pke_fifo_access(& me->fifo, new_fifo_pc); - /* XXX: don't check whether VU is idle?? */ + /* check for FIFO underflow */ + if(fq == NULL) + break; - if(me->pke_number == 0) - vu_addr_base = VU0_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9); - else + /* skip over DMA tag words if present in word 0 or 1 */ + if(fq->word_class[new_qw_pc] == wc_dma) + { + /* set ER0 */ + PKE_REG_MASK_SET(me, STAT, ER0, 1); + + /* mismatch error! */ + if(! PKE_REG_MASK_GET(me, ERR, ME0)) { - vu_addr_base = VU1_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9); - if(BIT_MASK_GET(imm, 15, 15)) /* fetch R flag from imm word */ - vu_addr_base += PKE_REG_MASK_GET(me, TOPS, TOPS); + pke_begin_interrupt_stall(me); + /* don't stall just yet -- finish this instruction */ + /* the PPS_STALL state will be entered by pke_issue() next time */ } + /* skip by going around loop an extra time */ + num ++; + } + } + while(num > 0); - /* XXX: vu_addr overflow check */ + /* return pointer to operand word itself */ + if(fq != NULL) + { + *operand = & fq->data[new_qw_pc]; + + /* annote the word where the pseudo-PC lands as an PKE operand */ + ASSERT(fq->word_class[new_qw_pc] == wc_pkedata || fq->word_class[new_qw_pc] == wc_unknown); + fq->word_class[new_qw_pc] = wc_pkedata; + + /* store search results in cache */ + /* keys */ + me->last_fifo_pc = me->fifo_pc; + me->last_qw_pc = me->qw_pc; + /* values */ + me->last_num = operand_num; + me->last_new_fifo_pc = new_fifo_pc; + me->last_new_qw_pc = new_qw_pc; + } - /* transfer given number of vectors */ - operand_num = 1; /* word index into instruction stream: 1..num_operands */ - vector_num = 0; /* vector number being processed: 0..num-1 */ - while(operand_num <= num_operands) - { - quadword vu_old_data; - quadword vu_new_data; - quadword unpacked_data; - address_word vu_addr; - struct fifo_quadword* fq; - int i; + return fq; +} - /* XXX: set NUM */ - /* compute VU destination address, as bytes in R5900 memory */ - if(cl >= wl) - { - /* map zero to max+1 */ - if(wl == 0) wl = 0x0100; - vu_addr = vu_addr_base + 16*(cl*(vector_num/wl) + (vector_num%wl)); - } - else - vu_addr = vu_addr_base + 16*vector_num; +/* Return pointer to given operand# in FIFO. `operand_num' starts at 1. + If FIFO is not full enough, return 0. Skip over DMA tags, but mark + them as an error (ER0). */ - /* read old VU data word at address */ - sim_read(NULL, (SIM_ADDR) vu_addr, (void*) & vu_old_data, sizeof(vu_old_data)); +unsigned_4* +pke_pcrel_operand(struct pke_device* me, int operand_num) +{ + unsigned_4* operand = NULL; + struct fifo_quadword* fifo_operand; - /* Let sourceaddr track the first operand */ - fq = pke_pc_fifo(me, operand_num); + fifo_operand = pke_pcrel_fifo(me, operand_num, & operand); - /* For cyclic unpack, next operand quadword may come from instruction stream - or be zero. */ - if((cl < wl) && ((vector_num % wl) >= cl)) /* wl != 0, set above */ - { - /* clear operand - used only in a "indeterminate" state */ - for(i = 0; i < 4; i++) - unpacked_data[i] = 0; - } - else - { - /* compute unpacked words from instruction stream */ - switch(vnvl) - { - case PKE_UNPACK_S_32: - case PKE_UNPACK_V2_32: - case PKE_UNPACK_V3_32: - case PKE_UNPACK_V4_32: - /* copy (vn+1) 32-bit values */ - for(i = 0; i < vn+1; i++) - { - unsigned_4* operand = pke_pc_operand(me, operand_num); - unpacked_data[i] = *operand; - operand_num ++; - } - break; - - case PKE_UNPACK_S_16: - case PKE_UNPACK_V2_16: - case PKE_UNPACK_V3_16: - case PKE_UNPACK_V4_16: - /* copy (vn+1) 16-bit values, packed two-per-word */ - for(i=0; i vn) - masked_value = & zero; /* XXX: what to put here? */ - else - masked_value = & unpacked_data[i]; - break; - - case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */ - masked_value = & me->regs[PKE_REG_R0 + i][0]; - break; - - case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */ - masked_value = & me->regs[PKE_REG_C0 + PKE_LIMIT(vector_num,3)][0]; - break; - - case PKE_MASKREG_NOTHING: - /* "write inhibit" by re-copying old data */ - masked_value = & vu_old_data[i]; - break; - - default: - ASSERT(0); - /* no other cases possible */ - } - - /* copy masked value for column */ - memcpy(& vu_new_data[i], masked_value, sizeof(unsigned_4)); - } /* loop over columns */ - } - else - { - /* no mask - just copy over entire unpacked quadword */ - memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data)); - } + return operand; +} - /* process STMOD register for accumulation operations */ - switch(PKE_REG_MASK_GET(me, MODE, MDE)) - { - case PKE_MODE_ADDROW: /* add row registers to output data */ - for(i=0; i<4; i++) - /* exploit R0..R3 contiguity */ - vu_new_data[i] += me->regs[PKE_REG_R0 + i][0]; - break; - - case PKE_MODE_ACCROW: /* add row registers to output data; accumulate */ - for(i=0; i<4; i++) - { - /* exploit R0..R3 contiguity */ - vu_new_data[i] += me->regs[PKE_REG_R0 + i][0]; - me->regs[PKE_REG_R0 + i][0] = vu_new_data[i]; - } - break; - case PKE_MODE_INPUT: /* pass data through */ - default: - ; - } +/* Return a bit-field extract of given operand# in FIFO, and its + source-addr. `bit_offset' starts at 0, referring to LSB after PKE + instruction word. Width must be >0, <=32. Assume FIFO is full + enough. Skip over DMA tags, but mark them as an error (ER0). */ - /* write replacement word */ - pke_track_write(me, vu_new_data, sizeof(vu_new_data), - (SIM_ADDR) vu_addr, fq->source_address); +unsigned_4 +pke_pcrel_operand_bits(struct pke_device* me, int bit_offset, int bit_width, unsigned_4* source_addr) +{ + unsigned_4* word = NULL; + unsigned_4 value; + struct fifo_quadword* fifo_operand; + int wordnumber, bitnumber; - /* next vector please */ - vector_num ++; - } /* vector transfer loop */ - } /* PKE FIFO full enough */ - else - { - /* need to wait for another word */ - next_pps_state = PKE_REG_STAT_PPS_WAIT; - /* retry this instruction next clock */ - } + wordnumber = bit_offset/32; + bitnumber = bit_offset%32; + + /* find operand word with bitfield */ + fifo_operand = pke_pcrel_fifo(me, wordnumber + 1, &word); + ASSERT(word != NULL); + + /* extract bitfield from word */ + value = BIT_MASK_GET(*word, bitnumber, bitnumber + bit_width - 1); + + /* extract source addr from fifo word */ + *source_addr = fifo_operand->source_address; + + return value; +} + + + +/* check for stall conditions on indicated devices (path* only on + PKE1), do not change status; return 0 iff no stall */ +int +pke_check_stall(struct pke_device* me, enum pke_check_target what) +{ + int any_stall = 0; + unsigned_4 cop2_stat, gpuif_stat; + + /* read status words */ + ASSERT(sizeof(unsigned_4) == 4); + PKE_MEM_READ(me, (GIF_REG_STAT), + & gpuif_stat, + 4); + PKE_MEM_READ(me, (COP2_REG_STAT_ADDR), + & cop2_stat, + 4); + + /* perform checks */ + if(what == chk_vu) + { + if(me->pke_number == 0) + any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS0_B, COP2_REG_STAT_VBS0_E); + else /* if(me->pke_number == 1) */ + any_stall = BIT_MASK_GET(cop2_stat, COP2_REG_STAT_VBS1_B, COP2_REG_STAT_VBS1_E); + } + else if(what == chk_path1) /* VU -> GPUIF */ + { + ASSERT(me->pke_number == 1); + if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 1) + any_stall = 1; + } + else if(what == chk_path2) /* PKE -> GPUIF */ + { + ASSERT(me->pke_number == 1); + if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 2) + any_stall = 1; + } + else if(what == chk_path3) /* DMA -> GPUIF */ + { + ASSERT(me->pke_number == 1); + if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 3) + any_stall = 1; } - /* ... */ else { - /* set ER1 flag in STAT register */ - PKE_REG_MASK_SET(me, STAT, ER1, 1); - /* advance over faulty word */ - pke_pc_advance(me, 1); + /* invalid what */ + ASSERT(0); } - /* PKE is now idle or waiting */ - PKE_REG_MASK_SET(me, STAT, PPS, next_pps_state); + /* any stall reasons? */ + return any_stall; +} + + +/* PKE1 only: flip the DBF bit; recompute TOPS, TOP */ +void +pke_flip_dbf(struct pke_device* me) +{ + int newdf; + /* compute new TOP */ + PKE_REG_MASK_SET(me, TOP, TOP, + PKE_REG_MASK_GET(me, TOPS, TOPS)); + /* flip DBF */ + newdf = PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1; + PKE_REG_MASK_SET(me, DBF, DF, newdf); + PKE_REG_MASK_SET(me, STAT, DBF, newdf); + /* compute new TOPS */ + PKE_REG_MASK_SET(me, TOPS, TOPS, + (PKE_REG_MASK_GET(me, BASE, BASE) + + newdf * PKE_REG_MASK_GET(me, OFST, OFFSET))); + + /* this is equivalent to last word from okadaa (98-02-25): + 1) TOP=TOPS; + 2) TOPS=BASE + !DBF*OFFSET + 3) DBF=!DBF */ +} + + +/* set the STAT:PIS bit and send an interrupt to the 5900 */ +void +pke_begin_interrupt_stall(struct pke_device* me) +{ + /* set PIS */ + PKE_REG_MASK_SET(me, STAT, PIS, 1); + + /* XXX: send interrupt to 5900? */ } +/* PKEcode handler functions -- responsible for checking and + confirming old stall conditions, executing pkecode, updating PC and + status registers -- may assume being run on correct PKE unit */ + +void +pke_code_nop(struct pke_device* me, unsigned_4 pkecode) +{ + /* done */ + pke_pc_advance(me, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); +} -/* advance the PC by given number of words; update STAT/FQC field */ +void +pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode) +{ + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + + /* copy immediate value into CYCLE reg */ + PKE_REG_MASK_SET(me, CYCLE, WL, BIT_MASK_GET(imm, 8, 15)); + PKE_REG_MASK_SET(me, CYCLE, CL, BIT_MASK_GET(imm, 0, 7)); + /* done */ + pke_pc_advance(me, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); +} + void -pke_pc_advance(struct pke_device* me, int num_words) +pke_code_offset(struct pke_device* me, unsigned_4 pkecode) +{ + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + + /* copy 10 bits to OFFSET field */ + PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9)); + /* clear DBF bit */ + PKE_REG_MASK_SET(me, DBF, DF, 0); + /* clear other DBF bit */ + PKE_REG_MASK_SET(me, STAT, DBF, 0); + /* set TOPS = BASE */ + PKE_REG_MASK_SET(me, TOPS, TOPS, PKE_REG_MASK_GET(me, BASE, BASE)); + /* done */ + pke_pc_advance(me, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); +} + + +void +pke_code_base(struct pke_device* me, unsigned_4 pkecode) +{ + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + + /* copy 10 bits to BASE field */ + PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9)); + /* done */ + pke_pc_advance(me, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); +} + + +void +pke_code_itop(struct pke_device* me, unsigned_4 pkecode) { - ASSERT(num_words > 0); + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + + /* copy 10 bits to ITOPS field */ + PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9)); + /* done */ + pke_pc_advance(me, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); +} + + +void +pke_code_stmod(struct pke_device* me, unsigned_4 pkecode) +{ + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + + /* copy 2 bits to MODE register */ + PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2)); + /* done */ + pke_pc_advance(me, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); +} + + +void +pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode) +{ + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + unsigned_4 gif_mode; + + /* set appropriate bit */ + if(BIT_MASK_GET(imm, PKE_REG_MSKPATH3_B, PKE_REG_MSKPATH3_E) != 0) + gif_mode = GIF_REG_STAT_M3P; + else + gif_mode = 0; + + /* write register to "read-only" register; gpuif code will look at M3P bit only */ + PKE_MEM_WRITE(me, GIF_REG_VIF_M3P, & gif_mode, 4); + + /* done */ + pke_pc_advance(me, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); +} + + +void +pke_code_pkemark(struct pke_device* me, unsigned_4 pkecode) +{ + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + /* copy 16 bits to MARK register */ + PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15)); + /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */ + PKE_REG_MASK_SET(me, STAT, MRK, 1); + /* done */ + pke_pc_advance(me, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); +} + - me->qw_pc += num_words; - /* handle overflow */ - while(me->qw_pc >= 4) +void +pke_code_flushe(struct pke_device* me, unsigned_4 pkecode) +{ + /* compute next PEW bit */ + if(pke_check_stall(me, chk_vu)) + { + /* VU busy */ + PKE_REG_MASK_SET(me, STAT, PEW, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); + /* try again next cycle */ + } + else { - me->qw_pc -= 4; - me->fifo_pc ++; + /* VU idle */ + PKE_REG_MASK_SET(me, STAT, PEW, 0); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1); } +} - /* clear FQC if FIFO is now empty */ - if(me->fifo_num_elements == me->fifo_pc) + +void +pke_code_flush(struct pke_device* me, unsigned_4 pkecode) +{ + int something_busy = 0; + + /* compute next PEW, PGW bits */ + if(pke_check_stall(me, chk_vu)) { - PKE_REG_MASK_SET(me, STAT, FQC, 0); + something_busy = 1; + PKE_REG_MASK_SET(me, STAT, PEW, 1); + } + else + PKE_REG_MASK_SET(me, STAT, PEW, 0); + + + if(pke_check_stall(me, chk_path1) || + pke_check_stall(me, chk_path2)) + { + something_busy = 1; + PKE_REG_MASK_SET(me, STAT, PGW, 1); } + else + PKE_REG_MASK_SET(me, STAT, PGW, 0); + /* go or no go */ + if(something_busy) + { + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* try again next cycle */ + } + else + { + /* all idle */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1); + } } +void +pke_code_flusha(struct pke_device* me, unsigned_4 pkecode) +{ + int something_busy = 0; -/* Return pointer to given operand# in FIFO. `word_num' starts at 1. - If FIFO is not full enough, return 0. */ + /* compute next PEW, PGW bits */ + if(pke_check_stall(me, chk_vu)) + { + something_busy = 1; + PKE_REG_MASK_SET(me, STAT, PEW, 1); + } + else + PKE_REG_MASK_SET(me, STAT, PEW, 0); -unsigned_4* -pke_pc_operand(struct pke_device* me, int word_num) + + if(pke_check_stall(me, chk_path1) || + pke_check_stall(me, chk_path2) || + pke_check_stall(me, chk_path3)) + { + something_busy = 1; + PKE_REG_MASK_SET(me, STAT, PGW, 1); + } + else + PKE_REG_MASK_SET(me, STAT, PGW, 0); + + if(something_busy) + { + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* try again next cycle */ + } + else + { + /* all idle */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1); + } +} + + +void +pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode) { - int new_qw_pc, new_fifo_pc; - unsigned_4* operand; + /* compute next PEW bit */ + if(pke_check_stall(me, chk_vu)) + { + /* VU busy */ + PKE_REG_MASK_SET(me, STAT, PEW, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); + /* try again next cycle */ + } + else + { + unsigned_4 vu_pc; + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + + /* VU idle */ + PKE_REG_MASK_SET(me, STAT, PEW, 0); + + /* flip DBF on PKE1 */ + if(me->pke_number == 1) + pke_flip_dbf(me); + + /* compute new PC for VU (host byte-order) */ + vu_pc = BIT_MASK_GET(imm, 0, 15); + vu_pc = T2H_4(vu_pc); - ASSERT(word_num > 0); + /* write new PC; callback function gets VU running */ + ASSERT(sizeof(unsigned_4) == 4); + PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA), + & vu_pc, + 4); - new_fifo_pc = me->fifo_pc; - new_qw_pc += me->qw_pc + word_num; + /* copy ITOPS field to ITOP */ + PKE_REG_MASK_SET(me, ITOP, ITOP, PKE_REG_MASK_GET(me, ITOPS, ITOPS)); - /* handle overflow */ - while(new_qw_pc >= 4) + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1); + } +} + + + +void +pke_code_pkemscnt(struct pke_device* me, unsigned_4 pkecode) +{ + /* compute next PEW bit */ + if(pke_check_stall(me, chk_vu)) + { + /* VU busy */ + PKE_REG_MASK_SET(me, STAT, PEW, 1); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); + /* try again next cycle */ + } + else + { + unsigned_4 vu_pc; + + /* VU idle */ + PKE_REG_MASK_SET(me, STAT, PEW, 0); + + /* flip DBF on PKE1 */ + if(me->pke_number == 1) + pke_flip_dbf(me); + + /* read old PC */ + ASSERT(sizeof(unsigned_4) == 4); + PKE_MEM_READ(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA), + & vu_pc, + 4); + + /* rewrite new PC; callback function gets VU running */ + ASSERT(sizeof(unsigned_4) == 4); + PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA), + & vu_pc, + 4); + + /* copy ITOPS field to ITOP */ + PKE_REG_MASK_SET(me, ITOP, ITOP, PKE_REG_MASK_GET(me, ITOPS, ITOPS)); + + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1); + } +} + + +void +pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode) +{ + int something_busy = 0; + + /* compute next PEW, PGW bits */ + if(pke_check_stall(me, chk_vu)) { - new_qw_pc -= 4; - new_fifo_pc ++; + something_busy = 1; + PKE_REG_MASK_SET(me, STAT, PEW, 1); } + else + PKE_REG_MASK_SET(me, STAT, PEW, 0); + - /* not enough elements */ - if(me->fifo_num_elements == me->fifo_pc) - operand = NULL; + if(pke_check_stall(me, chk_path1) || + pke_check_stall(me, chk_path2) || + pke_check_stall(me, chk_path3)) + { + something_busy = 1; + PKE_REG_MASK_SET(me, STAT, PGW, 1); + } else - operand = & me->fifo[new_fifo_pc].data[new_qw_pc]; + PKE_REG_MASK_SET(me, STAT, PGW, 0); - return operand; + /* go or no go */ + if(something_busy) + { + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* try again next cycle */ + } + else + { + unsigned_4 vu_pc; + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + + /* flip DBF on PKE1 */ + if(me->pke_number == 1) + pke_flip_dbf(me); + + /* compute new PC for VU (host byte-order) */ + vu_pc = BIT_MASK_GET(imm, 0, 15); + vu_pc = T2H_4(vu_pc); + + /* rewrite new PC; callback function gets VU running */ + ASSERT(sizeof(unsigned_4) == 4); + PKE_MEM_WRITE(me, (me->pke_number == 0 ? VU0_CIA : VU1_CIA), + & vu_pc, + 4); + + /* copy ITOPS field to ITOP */ + PKE_REG_MASK_SET(me, ITOP, ITOP, PKE_REG_MASK_GET(me, ITOPS, ITOPS)); + + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1); + } } +void +pke_code_stmask(struct pke_device* me, unsigned_4 pkecode) +{ + unsigned_4* mask; -/* Return pointer to FIFO quadword containing given operand# in FIFO. - `word_num' starts at 1. If FIFO is not full enough, return 0. */ + /* check that FIFO has one more word for STMASK operand */ + mask = pke_pcrel_operand(me, 1); + if(mask != NULL) + { + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); -struct fifo_quadword* -pke_pc_fifo(struct pke_device* me, int word_num) + /* set NUM */ + PKE_REG_MASK_SET(me, NUM, NUM, 1); + + /* fill the register */ + PKE_REG_MASK_SET(me, MASK, MASK, *mask); + + /* set NUM */ + PKE_REG_MASK_SET(me, NUM, NUM, 0); + + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 2); + } + else + { + /* need to wait for another word */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* try again next cycle */ + } +} + + +void +pke_code_strow(struct pke_device* me, unsigned_4 pkecode) { - int new_qw_pc, new_fifo_pc; - struct fifo_quadword* operand; + /* check that FIFO has four more words for STROW operand */ + unsigned_4* last_op; + + last_op = pke_pcrel_operand(me, 4); + if(last_op != NULL) + { + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + + /* set NUM */ + PKE_REG_MASK_SET(me, NUM, NUM, 1); + + /* copy ROW registers: must all exist if 4th operand exists */ + me->regs[PKE_REG_R0][0] = * pke_pcrel_operand(me, 1); + me->regs[PKE_REG_R1][0] = * pke_pcrel_operand(me, 2); + me->regs[PKE_REG_R2][0] = * pke_pcrel_operand(me, 3); + me->regs[PKE_REG_R3][0] = * pke_pcrel_operand(me, 4); + + /* set NUM */ + PKE_REG_MASK_SET(me, NUM, NUM, 0); - ASSERT(word_num > 0); + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 5); + } + else + { + /* need to wait for another word */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* try again next cycle */ + } +} - new_fifo_pc = me->fifo_pc; - new_qw_pc += me->qw_pc + word_num; - /* handle overflow */ - while(new_qw_pc >= 4) +void +pke_code_stcol(struct pke_device* me, unsigned_4 pkecode) +{ + /* check that FIFO has four more words for STCOL operand */ + unsigned_4* last_op; + + last_op = pke_pcrel_operand(me, 4); + if(last_op != NULL) { - new_qw_pc -= 4; - new_fifo_pc ++; + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + + /* set NUM */ + PKE_REG_MASK_SET(me, NUM, NUM, 1); + + /* copy COL registers: must all exist if 4th operand exists */ + me->regs[PKE_REG_C0][0] = * pke_pcrel_operand(me, 1); + me->regs[PKE_REG_C1][0] = * pke_pcrel_operand(me, 2); + me->regs[PKE_REG_C2][0] = * pke_pcrel_operand(me, 3); + me->regs[PKE_REG_C3][0] = * pke_pcrel_operand(me, 4); + + /* set NUM */ + PKE_REG_MASK_SET(me, NUM, NUM, 0); + + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 5); } + else + { + /* need to wait for another word */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* try again next cycle */ + } +} + + +void +pke_code_mpg(struct pke_device* me, unsigned_4 pkecode) +{ + unsigned_4* last_mpg_word; + int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E); + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + + /* assert 64-bit alignment of MPG operand */ + if(me->qw_pc != 3 && me->qw_pc != 1) + return pke_code_error(me, pkecode); + + /* map zero to max+1 */ + if(num==0) num=0x100; + + /* check that FIFO has a few more words for MPG operand */ + last_mpg_word = pke_pcrel_operand(me, num*2); /* num: number of 64-bit words */ + if(last_mpg_word != NULL) + { + /* perform implied FLUSHE */ + if(pke_check_stall(me, chk_vu)) + { + /* VU busy */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); + /* retry this instruction next clock */ + } + else + { + /* VU idle */ + int i; + + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + + /* set NUM */ + PKE_REG_MASK_SET(me, NUM, NUM, num); - /* not enough elements */ - if(me->fifo_num_elements == me->fifo_pc) - operand = NULL; + /* transfer VU instructions, one word-pair per iteration */ + for(i=0; ipke_number == 0) ? + VU0_MEM0_WINDOW_START : VU1_MEM0_WINDOW_START; + vu_addr_max_size = (me->pke_number == 0) ? + VU0_MEM0_SIZE : VU1_MEM0_SIZE; + vutrack_addr_base = (me->pke_number == 0) ? + VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START; + + /* compute VU address for this word-pair */ + vu_addr = vu_addr_base + (imm + i) * 8; + /* check for vu_addr overflow */ + while(vu_addr >= vu_addr_base + vu_addr_max_size) + vu_addr -= vu_addr_max_size; + + /* compute VU tracking address */ + vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 2; + + /* Fetch operand words; assume they are already little-endian for VU imem */ + fq = pke_pcrel_fifo(me, i*2 + 1, & operand); + vu_lower_opcode = *operand; + vu_upper_opcode = *pke_pcrel_operand(me, i*2 + 2); + + /* write data into VU memory */ + /* lower (scalar) opcode comes in first word ; macro performs H2T! */ + PKE_MEM_WRITE(me, vu_addr, + & vu_lower_opcode, + 4); + /* upper (vector) opcode comes in second word ; H2T */ + ASSERT(sizeof(unsigned_4) == 4); + PKE_MEM_WRITE(me, vu_addr + 4, + & vu_upper_opcode, + 4); + + /* write tracking address in target byte-order */ + ASSERT(sizeof(unsigned_4) == 4); + PKE_MEM_WRITE(me, vutrack_addr, + & fq->source_address, + 4); + } /* VU xfer loop */ + + /* check NUM */ + ASSERT(PKE_REG_MASK_GET(me, NUM, NUM) == 0); + + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1 + num*2); + } + } /* if FIFO full enough */ else - operand = & me->fifo[new_fifo_pc]; + { + /* need to wait for another word */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* retry this instruction next clock */ + } +} - return operand; + +void +pke_code_direct(struct pke_device* me, unsigned_4 pkecode) +{ + /* check that FIFO has a few more words for DIRECT operand */ + unsigned_4* last_direct_word; + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + + /* assert 128-bit alignment of DIRECT operand */ + if(me->qw_pc != 3) + return pke_code_error(me, pkecode); + + /* map zero to max+1 */ + if(imm==0) imm=0x10000; + + last_direct_word = pke_pcrel_operand(me, imm*4); /* imm: number of 128-bit words */ + if(last_direct_word != NULL) + { + /* VU idle */ + int i; + unsigned_16 fifo_data; + + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + + /* transfer GPUIF quadwords, one word per iteration */ + for(i=0; ipke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR, - (void*) & sourceaddr, - sizeof(unsigned_4)); + int imm = BIT_MASK_GET(pkecode, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + int cmd = BIT_MASK_GET(pkecode, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E); + int num = BIT_MASK_GET(pkecode, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E); + int nummx = (num == 0) ? 0x0100 : num; + short vn = BIT_MASK_GET(cmd, 2, 3); /* unpack shape controls */ + short vl = BIT_MASK_GET(cmd, 0, 1); + int m = BIT_MASK_GET(cmd, 4, 4); + short cl = PKE_REG_MASK_GET(me, CYCLE, CL); /* cycle controls */ + short wl = PKE_REG_MASK_GET(me, CYCLE, WL); + short addrwl = (wl == 0) ? 0x0100 : wl; + int r = BIT_MASK_GET(imm, 15, 15); /* indicator bits in imm value */ + int usn = BIT_MASK_GET(imm, 14, 14); + + int n, num_operands; + unsigned_4* last_operand_word = NULL; + + /* catch all illegal UNPACK variants */ + if(vl == 3 && vn < 3) + { + pke_code_error(me, pkecode); + return; + } - /* write bytes into simulator */ - rc = sim_write(NULL, - (SIM_ADDR) dest, - (void*) src, - len); + /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */ + if(cl >= addrwl) + n = num; + else + n = cl * (nummx / addrwl) + PKE_LIMIT(nummx % addrwl, cl); + num_operands = (31 + (32 >> vl) * (vn+1) * n)/32; /* round up to next word */ - /* clear srcaddr from PKE srcaddr tracking */ - sim_write(NULL, - (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR, - (void*) & no_sourceaddr, - sizeof(unsigned_4)); + /* confirm that FIFO has enough words in it */ + if(num_operands > 0) + last_operand_word = pke_pcrel_operand(me, num_operands); + if(last_operand_word != NULL || num_operands == 0) + { + address_word vu_addr_base, vutrack_addr_base; + address_word vu_addr_max_size; + int vector_num_out, vector_num_in; + + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + + /* don't check whether VU is idle */ + + /* compute VU address base */ + if(me->pke_number == 0) + { + vu_addr_base = VU0_MEM1_WINDOW_START; + vu_addr_max_size = VU0_MEM1_SIZE; + vutrack_addr_base = VU0_MEM1_SRCADDR_START; + r = 0; + } + else + { + vu_addr_base = VU1_MEM1_WINDOW_START; + vu_addr_max_size = VU1_MEM1_SIZE; + vutrack_addr_base = VU1_MEM1_SRCADDR_START; + } + + /* set NUM */ + PKE_REG_MASK_SET(me, NUM, NUM, nummx); + + /* transfer given number of vectors */ + vector_num_out = 0; /* output vector number being processed */ + vector_num_in = 0; /* argument vector number being processed */ + do + { + quadword vu_old_data; + quadword vu_new_data; + quadword unpacked_data; + address_word vu_addr; + address_word vutrack_addr; + unsigned_4 source_addr = 0; + int i; + int next_num; + + /* decrement NUM */ + next_num = PKE_REG_MASK_GET(me, NUM, NUM) - 1; + PKE_REG_MASK_SET(me, NUM, NUM, next_num); + + /* compute VU destination address, as bytes in R5900 memory */ + if(cl >= wl) + { + /* map zero to max+1 */ + vu_addr = vu_addr_base + 16 * (BIT_MASK_GET(imm, 0, 9) + + (vector_num_out / addrwl) * cl + + (vector_num_out % addrwl)); + } + else + vu_addr = vu_addr_base + 16 * (BIT_MASK_GET(imm, 0, 9) + + vector_num_out); + + /* handle "R" double-buffering bit */ + if(r) + vu_addr += 16 * PKE_REG_MASK_GET(me, TOPS, TOPS); + + /* check for vu_addr overflow */ + while(vu_addr >= vu_addr_base + vu_addr_max_size) + vu_addr -= vu_addr_max_size; + + /* compute address of tracking table entry */ + vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 4; + + /* read old VU data word at address; reverse words if needed */ + { + unsigned_16 vu_old_badwords; + ASSERT(sizeof(vu_old_badwords) == 16); + PKE_MEM_READ(me, vu_addr, + &vu_old_badwords, 16); + vu_old_data[0] = * A4_16(& vu_old_badwords, 3); + vu_old_data[1] = * A4_16(& vu_old_badwords, 2); + vu_old_data[2] = * A4_16(& vu_old_badwords, 1); + vu_old_data[3] = * A4_16(& vu_old_badwords, 0); + } + + /* For cyclic unpack, next operand quadword may come from instruction stream + or be zero. */ + if((cl < addrwl) && + (vector_num_out % addrwl) >= cl) + { + /* clear operand - used only in a "indeterminate" state */ + for(i = 0; i < 4; i++) + unpacked_data[i] = 0; + } + else + { + /* compute packed vector dimensions */ + int vectorbits = 0, unitbits = 0; + + if(vl < 3) /* PKE_UNPACK_*_{32,16,8} */ + { + unitbits = (32 >> vl); + vectorbits = unitbits * (vn+1); + } + else if(vl == 3 && vn == 3) /* PKE_UNPACK_V4_5 */ + { + unitbits = 5; + vectorbits = 16; + } + else /* illegal unpack variant */ + { + /* should have been caught at top of function */ + ASSERT(0); + } + + /* loop over columns */ + for(i=0; i<=vn; i++) + { + unsigned_4 operand; + + /* offset in bits in current operand word */ + int bitoffset = + (vector_num_in * vectorbits) + (i * unitbits); /* # of bits from PKEcode */ + + /* last unit of V4_5 is only one bit wide */ + if(vl == 3 && vn == 3 && i == 3) /* PKE_UNPACK_V4_5 */ + unitbits = 1; + + /* confirm we're not reading more than we said we needed */ + if(vector_num_in * vectorbits >= num_operands * 32) + { + /* this condition may be triggered by illegal + PKEcode / CYCLE combinations. */ + pke_code_error(me, pkecode); + /* XXX: this case needs to be better understood, + and detected at a better time. */ + return; + } + + /* fetch bitfield operand */ + operand = pke_pcrel_operand_bits(me, bitoffset, unitbits, & source_addr); + + /* selectively sign-extend; not for V4_5 1-bit value */ + if(usn || unitbits == 1) + unpacked_data[i] = operand; + else + unpacked_data[i] = SEXT32(operand, unitbits-1); + } + + /* set remaining top words in vector */ + for(i=vn+1; i<4; i++) + { + if(vn == 0) /* S_{32,16,8}: copy lowest element */ + unpacked_data[i] = unpacked_data[0]; + else + unpacked_data[i] = 0; + } + + /* consumed a vector from the PKE instruction stream */ + vector_num_in ++; + } /* unpack word from instruction operand */ + + /* process STMOD register for accumulation operations */ + switch(PKE_REG_MASK_GET(me, MODE, MDE)) + { + case PKE_MODE_ADDROW: /* add row registers to output data */ + case PKE_MODE_ACCROW: /* same .. later conditionally accumulate */ + for(i=0; i<4; i++) + /* exploit R0..R3 contiguity */ + unpacked_data[i] += me->regs[PKE_REG_R0 + i][0]; + break; + + case PKE_MODE_INPUT: /* pass data through */ + default: /* specified as undefined */ + ; + } + + /* compute replacement word */ + if(m) /* use mask register? */ + { + /* compute index into mask register for this word */ + int mask_index = PKE_LIMIT(vector_num_out % addrwl, 3); + + for(i=0; i<4; i++) /* loop over columns */ + { + int mask_op = PKE_MASKREG_GET(me, mask_index, i); + unsigned_4* masked_value = NULL; + + switch(mask_op) + { + case PKE_MASKREG_INPUT: + masked_value = & unpacked_data[i]; + + /* conditionally accumulate */ + if(PKE_REG_MASK_GET(me, MODE, MDE) == PKE_MODE_ACCROW) + me->regs[PKE_REG_R0 + i][0] = unpacked_data[i]; + + break; + + case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */ + masked_value = & me->regs[PKE_REG_R0 + i][0]; + break; + + case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */ + masked_value = & me->regs[PKE_REG_C0 + mask_index][0]; + break; + + case PKE_MASKREG_NOTHING: + /* "write inhibit" by re-copying old data */ + masked_value = & vu_old_data[i]; + break; + + default: + ASSERT(0); + /* no other cases possible */ + } + + /* copy masked value for column */ + vu_new_data[i] = *masked_value; + } /* loop over columns */ + } /* mask */ + else + { + /* no mask - just copy over entire unpacked quadword */ + memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data)); + + /* conditionally store accumulated row results */ + if(PKE_REG_MASK_GET(me, MODE, MDE) == PKE_MODE_ACCROW) + for(i=0; i<4; i++) + me->regs[PKE_REG_R0 + i][0] = unpacked_data[i]; + } + + /* write new VU data word at address; reverse words if needed */ + { + unsigned_16 vu_new_badwords; + * A4_16(& vu_new_badwords, 3) = vu_new_data[0]; + * A4_16(& vu_new_badwords, 2) = vu_new_data[1]; + * A4_16(& vu_new_badwords, 1) = vu_new_data[2]; + * A4_16(& vu_new_badwords, 0) = vu_new_data[3]; + ASSERT(sizeof(vu_new_badwords) == 16); + PKE_MEM_WRITE(me, vu_addr, + &vu_new_badwords, 16); + } + + /* write tracking address */ + ASSERT(sizeof(unsigned_4) == 4); + PKE_MEM_WRITE(me, vutrack_addr, + & source_addr, + 4); + + /* next vector please */ + vector_num_out ++; + } /* vector transfer loop */ + while(PKE_REG_MASK_GET(me, NUM, NUM) > 0); + + /* confirm we've written as many vectors as told */ + ASSERT(nummx == vector_num_out); + + /* done */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + pke_pc_advance(me, 1 + num_operands); + } /* PKE FIFO full enough */ + else + { + /* need to wait for another word */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_WAIT); + /* retry this instruction next clock */ + } +} + + +void +pke_code_error(struct pke_device* me, unsigned_4 pkecode) +{ + /* set ER1 flag in STAT register */ + PKE_REG_MASK_SET(me, STAT, ER1, 1); + + if(! PKE_REG_MASK_GET(me, ERR, ME1)) + { + pke_begin_interrupt_stall(me); + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); + } + else + { + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); + } - return rc; + /* advance over faulty word */ + pke_pc_advance(me, 1); }