X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=sim%2Fmips%2Fsky-pke.c;h=6e318750eaab4a3dd451b946204bef9178341142;hb=c0a4c3ba170e91bf93d16e0a6340980f6d62901a;hp=011db64c15c619bf18599322d278c8a2a8329b24;hpb=fd90908986929e283cca54c3675f7131fa5b16ab;p=deliverable%2Fbinutils-gdb.git diff --git a/sim/mips/sky-pke.c b/sim/mips/sky-pke.c index 011db64c15..6e318750ea 100644 --- a/sim/mips/sky-pke.c +++ b/sim/mips/sky-pke.c @@ -1,38 +1,47 @@ /* Copyright (C) 1998, Cygnus Solutions */ -/* Debugguing PKE? */ -#define PKE_DEBUG - +#include "config.h" #include -#include "sky-pke.h" -#include "sky-dma.h" +#include "sim-main.h" #include "sim-bits.h" #include "sim-assert.h" -#include "sky-vu0.h" -#include "sky-vu1.h" +#include "sky-pke.h" +#include "sky-dma.h" +#include "sky-vu.h" #include "sky-gpuif.h" #include "sky-device.h" +#ifdef HAVE_STRING_H +#include +#else +#ifdef HAVE_STRINGS_H +#include +#endif +#endif + + /* Internal function declarations */ static int pke_io_read_buffer(device*, void*, int, address_word, unsigned, sim_cpu*, sim_cia); static int pke_io_write_buffer(device*, const void*, int, address_word, unsigned, sim_cpu*, sim_cia); +static void pke_reset(struct pke_device*); static void pke_issue(SIM_DESC, struct pke_device*); static void pke_pc_advance(struct pke_device*, int num_words); -static unsigned_4* pke_pc_operand(struct pke_device*, int operand_num); -static unsigned_4 pke_pc_operand_bits(struct pke_device*, int bit_offset, - int bit_width, unsigned_4* sourceaddr); -static struct fifo_quadword* pke_pc_fifo(struct pke_device*, int operand_num, - unsigned_4** operand); +static struct fifo_quadword* pke_pcrel_fifo(struct pke_device*, int operand_num, + unsigned_4** operand); +static unsigned_4* pke_pcrel_operand(struct pke_device*, int operand_num); +static unsigned_4 pke_pcrel_operand_bits(struct pke_device*, int bit_offset, + int bit_width, unsigned_4* sourceaddr); static void pke_attach(SIM_DESC sd, struct pke_device* me); enum pke_check_target { chk_vu, chk_path1, chk_path2, chk_path3 }; static int pke_check_stall(struct pke_device* me, enum pke_check_target what); static void pke_flip_dbf(struct pke_device* me); +static void pke_begin_interrupt_stall(struct pke_device* me); /* PKEcode handlers */ static void pke_code_nop(struct pke_device* me, unsigned_4 pkecode); static void pke_code_stcycl(struct pke_device* me, unsigned_4 pkecode); @@ -56,6 +65,11 @@ static void pke_code_direct(struct pke_device* me, unsigned_4 pkecode); static void pke_code_directhl(struct pke_device* me, unsigned_4 pkecode); static void pke_code_unpack(struct pke_device* me, unsigned_4 pkecode); static void pke_code_error(struct pke_device* me, unsigned_4 pkecode); +unsigned_4 pke_fifo_flush(struct pke_fifo*); +void pke_fifo_reset(struct pke_fifo*); +struct fifo_quadword* pke_fifo_fit(struct pke_fifo*); +struct fifo_quadword* pke_fifo_access(struct pke_fifo*, unsigned_4 qwnum); +void pke_fifo_old(struct pke_fifo*, unsigned_4 qwnum); @@ -67,7 +81,9 @@ struct pke_device pke0_device = 0, 0, /* ID, flags */ {}, /* regs */ {}, 0, /* FIFO write buffer */ - NULL, 0, 0, NULL, /* FIFO */ + { NULL, 0, 0, 0 }, /* FIFO */ + NULL, /* FIFO trace file */ + -1, -1, 0, 0, 0, /* invalid FIFO cache */ 0, 0 /* pc */ }; @@ -78,7 +94,9 @@ struct pke_device pke1_device = 1, 0, /* ID, flags */ {}, /* regs */ {}, 0, /* FIFO write buffer */ - NULL, 0, 0, NULL, /* FIFO */ + { NULL, 0, 0, 0 }, /* FIFO */ + NULL, /* FIFO trace file */ + -1, -1, 0, 0, 0, /* invalid FIFO cache */ 0, 0 /* pc */ }; @@ -93,12 +111,14 @@ void pke0_attach(SIM_DESC sd) { pke_attach(sd, & pke0_device); + pke_reset(& pke0_device); } void pke1_attach(SIM_DESC sd) { pke_attach(sd, & pke1_device); + pke_reset(& pke1_device); } @@ -178,6 +198,70 @@ pke_attach(SIM_DESC sd, struct pke_device* me) } +/* Read PKE Pseudo-PC into buf in target order */ +int +read_pke_pc (struct pke_device *me, void *buf) +{ + *((int *) buf) = H2T_4( (me->fifo_pc << 2) | me->qw_pc ); + return 4; +} + + +/* Read PKE reg into buf in target order */ +int +read_pke_reg (struct pke_device *me, int reg_num, void *buf) +{ + /* handle reads to individual registers; clear `readable' on error */ + switch (reg_num) + { + /* handle common case of register reading, side-effect free */ + /* PKE1-only registers*/ + case PKE_REG_BASE: + case PKE_REG_OFST: + case PKE_REG_TOPS: + case PKE_REG_TOP: + case PKE_REG_DBF: + if (me->pke_number == 0) + { + *((int *) buf) = 0; + break; + } + /* fall through */ + + /* PKE0 & PKE1 common registers*/ + case PKE_REG_STAT: + case PKE_REG_ERR: + case PKE_REG_MARK: + case PKE_REG_CYCLE: + case PKE_REG_MODE: + case PKE_REG_NUM: + case PKE_REG_MASK: + case PKE_REG_CODE: + case PKE_REG_ITOPS: + case PKE_REG_ITOP: + case PKE_REG_R0: + case PKE_REG_R1: + case PKE_REG_R2: + case PKE_REG_R3: + case PKE_REG_C0: + case PKE_REG_C1: + case PKE_REG_C2: + case PKE_REG_C3: + *((int *) buf) = H2T_4(me->regs[reg_num][0]); + break; + + /* handle common case of write-only registers */ + case PKE_REG_FBRST: + *((int *) buf) = 0; + break; + + default: + ASSERT(0); /* tests above should prevent this possibility */ + } + + return 4; +} + /* Handle a PKE read; return no. of bytes read */ @@ -211,68 +295,15 @@ pke_io_read_buffer(device *me_, /* register bank */ int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4; int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */ - int readable = 1; quadword result; /* clear result */ result[0] = result[1] = result[2] = result[3] = 0; - /* handle reads to individual registers; clear `readable' on error */ - switch(reg_num) - { - /* handle common case of register reading, side-effect free */ - /* PKE1-only registers*/ - case PKE_REG_BASE: - case PKE_REG_OFST: - case PKE_REG_TOPS: - case PKE_REG_TOP: - case PKE_REG_DBF: - if(me->pke_number == 0) - readable = 0; - /* fall through */ - /* PKE0 & PKE1 common registers*/ - case PKE_REG_STAT: - case PKE_REG_ERR: - case PKE_REG_MARK: - case PKE_REG_CYCLE: - case PKE_REG_MODE: - case PKE_REG_NUM: - case PKE_REG_MASK: - case PKE_REG_CODE: - case PKE_REG_ITOPS: - case PKE_REG_ITOP: - case PKE_REG_R0: - case PKE_REG_R1: - case PKE_REG_R2: - case PKE_REG_R3: - case PKE_REG_C0: - case PKE_REG_C1: - case PKE_REG_C2: - case PKE_REG_C3: - result[0] = H2T_4(me->regs[reg_num][0]); - break; - - /* handle common case of write-only registers */ - case PKE_REG_FBRST: - readable = 0; - break; - - default: - ASSERT(0); /* test above should prevent this possibility */ - } + read_pke_reg (me, reg_num, result); /* perform transfer & return */ - if(readable) - { - /* copy the bits */ - memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes); - /* okay */ - } - else - { - /* return zero bits */ - memset(dest, 0, nr_bytes); - } + memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes); return nr_bytes; /* NOTREACHED */ @@ -291,8 +322,104 @@ pke_io_read_buffer(device *me_, return 0; } +/* Write PKE reg from buf, which is in target order */ +int +write_pke_reg (struct pke_device *me, int reg_num, const void *buf) +{ + int writeable = 1; + /* make words host-endian */ + unsigned_4 input = T2H_4( *((unsigned_4 *) buf) ); + + /* handle writes to individual registers; clear `writeable' on error */ + switch (reg_num) + { + case PKE_REG_FBRST: + /* Order these tests from least to most overriding, in case + multiple bits are set. */ + if(BIT_MASK_GET(input, PKE_REG_FBRST_STC_B, PKE_REG_FBRST_STC_E)) + { + /* clear a bunch of status bits */ + PKE_REG_MASK_SET(me, STAT, PSS, 0); + PKE_REG_MASK_SET(me, STAT, PFS, 0); + PKE_REG_MASK_SET(me, STAT, PIS, 0); + PKE_REG_MASK_SET(me, STAT, INT, 0); + PKE_REG_MASK_SET(me, STAT, ER0, 0); + PKE_REG_MASK_SET(me, STAT, ER1, 0); + me->flags &= ~PKE_FLAG_PENDING_PSS; + /* will allow resumption of possible stalled instruction */ + } + if(BIT_MASK_GET(input, PKE_REG_FBRST_STP_B, PKE_REG_FBRST_STP_E)) + { + me->flags |= PKE_FLAG_PENDING_PSS; + } + if(BIT_MASK_GET(input, PKE_REG_FBRST_FBK_B, PKE_REG_FBRST_FBK_E)) + { + PKE_REG_MASK_SET(me, STAT, PFS, 1); + } + if(BIT_MASK_GET(input, PKE_REG_FBRST_RST_B, PKE_REG_FBRST_RST_E)) + { + pke_reset(me); + } + break; + + case PKE_REG_ERR: + /* copy bottom three bits */ + BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input, 0, 2)); + break; + + case PKE_REG_MARK: + /* copy bottom sixteen bits */ + PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input, 0, 15)); + /* reset MRK bit in STAT */ + PKE_REG_MASK_SET(me, STAT, MRK, 0); + break; + + /* handle common case of read-only registers */ + /* PKE1-only registers - not really necessary to handle separately */ + case PKE_REG_BASE: + case PKE_REG_OFST: + case PKE_REG_TOPS: + case PKE_REG_TOP: + case PKE_REG_DBF: + if(me->pke_number == 0) + writeable = 0; + /* fall through */ + /* PKE0 & PKE1 common registers*/ + case PKE_REG_STAT: + /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */ + case PKE_REG_CYCLE: + case PKE_REG_MODE: + case PKE_REG_NUM: + case PKE_REG_MASK: + case PKE_REG_CODE: + case PKE_REG_ITOPS: + case PKE_REG_ITOP: + case PKE_REG_R0: + case PKE_REG_R1: + case PKE_REG_R2: + case PKE_REG_R3: + case PKE_REG_C0: + case PKE_REG_C1: + case PKE_REG_C2: + case PKE_REG_C3: + writeable = 0; + break; + + default: + ASSERT(0); /* test above should prevent this possibility */ + } + + /* perform return */ + if(! writeable) + { + return 0; /* error */ + } + + return 4; +} + -/* Handle a PKE read; return no. of bytes written */ +/* Handle a PKE write; return no. of bytes written */ int pke_io_write_buffer(device *me_, @@ -324,7 +451,6 @@ pke_io_write_buffer(device *me_, /* register bank */ int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4; int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */ - int writeable = 1; quadword input; /* clear input */ @@ -333,103 +459,7 @@ pke_io_write_buffer(device *me_, /* write user-given bytes into input */ memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes); - /* make words host-endian */ - input[0] = T2H_4(input[0]); - /* we may ignore other words */ - - /* handle writes to individual registers; clear `writeable' on error */ - switch(reg_num) - { - case PKE_REG_FBRST: - /* Order these tests from least to most overriding, in case - multiple bits are set. */ - if(BIT_MASK_GET(input[0], PKE_REG_FBRST_STC_B, PKE_REG_FBRST_STC_E)) - { - /* clear a bunch of status bits */ - PKE_REG_MASK_SET(me, STAT, PSS, 0); - PKE_REG_MASK_SET(me, STAT, PFS, 0); - PKE_REG_MASK_SET(me, STAT, PIS, 0); - PKE_REG_MASK_SET(me, STAT, INT, 0); - PKE_REG_MASK_SET(me, STAT, ER0, 0); - PKE_REG_MASK_SET(me, STAT, ER1, 0); - me->flags &= ~PKE_FLAG_PENDING_PSS; - /* will allow resumption of possible stalled instruction */ - } - if(BIT_MASK_GET(input[0], PKE_REG_FBRST_STP_B, PKE_REG_FBRST_STP_E)) - { - me->flags |= PKE_FLAG_PENDING_PSS; - } - if(BIT_MASK_GET(input[0], PKE_REG_FBRST_FBK_B, PKE_REG_FBRST_FBK_E)) - { - PKE_REG_MASK_SET(me, STAT, PFS, 1); - } - if(BIT_MASK_GET(input[0], PKE_REG_FBRST_RST_B, PKE_REG_FBRST_RST_E)) - { - /* clear FIFO by skipping to word after PC: also - prevents re-execution attempt of possible stalled - instruction */ - me->fifo_num_elements = me->fifo_pc; - /* clear registers, flag, other state */ - memset(me->regs, 0, sizeof(me->regs)); - me->fifo_qw_done = 0; - me->flags = 0; - me->qw_pc = 0; - } - break; - - case PKE_REG_ERR: - /* copy bottom three bits */ - BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input[0], 0, 2)); - break; - - case PKE_REG_MARK: - /* copy bottom sixteen bits */ - PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input[0], 0, 15)); - /* reset MRK bit in STAT */ - PKE_REG_MASK_SET(me, STAT, MRK, 0); - break; - - /* handle common case of read-only registers */ - /* PKE1-only registers - not really necessary to handle separately */ - case PKE_REG_BASE: - case PKE_REG_OFST: - case PKE_REG_TOPS: - case PKE_REG_TOP: - case PKE_REG_DBF: - if(me->pke_number == 0) - writeable = 0; - /* fall through */ - /* PKE0 & PKE1 common registers*/ - case PKE_REG_STAT: - /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */ - case PKE_REG_CYCLE: - case PKE_REG_MODE: - case PKE_REG_NUM: - case PKE_REG_MASK: - case PKE_REG_CODE: - case PKE_REG_ITOPS: - case PKE_REG_ITOP: - case PKE_REG_R0: - case PKE_REG_R1: - case PKE_REG_R2: - case PKE_REG_R3: - case PKE_REG_C0: - case PKE_REG_C1: - case PKE_REG_C2: - case PKE_REG_C3: - writeable = 0; - break; - - default: - ASSERT(0); /* test above should prevent this possibility */ - } - - /* perform return */ - if(! writeable) - { - ; /* error */ - } - + write_pke_reg (me, reg_num, input); return nr_bytes; /* NOTREACHED */ @@ -457,32 +487,19 @@ pke_io_write_buffer(device *me_, /* all done - process quadword after clearing flag */ BIT_MASK_SET(me->fifo_qw_done, 0, sizeof(quadword)-1, 0); - /* ensure FIFO has enough elements */ - if(me->fifo_num_elements == me->fifo_buffer_size) - { - /* time to grow */ - int new_fifo_buffer_size = me->fifo_buffer_size + 20; - void* ptr = realloc((void*) me->fifo, new_fifo_buffer_size*sizeof(struct fifo_quadword)); - - if(ptr == NULL) - { - /* oops, cannot enlarge FIFO any more */ - device_error(me_, "Cannot enlarge FIFO buffer\n"); - return 0; - } - - me->fifo = ptr; - me->fifo_buffer_size = new_fifo_buffer_size; - } + /* allocate required address in FIFO */ + fqw = pke_fifo_fit(& me->fifo); + ASSERT(fqw != NULL); - /* add new quadword at end of FIFO; store data in host-endian */ - fqw = & me->fifo[me->fifo_num_elements]; + /* fill in unclassified FIFO quadword data in host byte order */ fqw->word_class[0] = fqw->word_class[1] = fqw->word_class[2] = fqw->word_class[3] = wc_unknown; fqw->data[0] = T2H_4(me->fifo_qw_in_progress[0]); fqw->data[1] = T2H_4(me->fifo_qw_in_progress[1]); fqw->data[2] = T2H_4(me->fifo_qw_in_progress[2]); fqw->data[3] = T2H_4(me->fifo_qw_in_progress[3]); + + /* read DMAC-supplied indicators */ ASSERT(sizeof(unsigned_4) == 4); PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_MADR : DMA_D1_MADR), & fqw->source_address, /* converted to host-endian */ @@ -497,8 +514,6 @@ pke_io_write_buffer(device *me_, fqw->word_class[0] = fqw->word_class[1] = wc_dma; } - me->fifo_num_elements++; - /* set FQC to "1" as FIFO is now not empty */ PKE_REG_MASK_SET(me, STAT, FQC, 1); @@ -512,6 +527,21 @@ pke_io_write_buffer(device *me_, +/* Reset the PKE */ +void +pke_reset(struct pke_device* me) +{ + /* advance PC over last quadword in FIFO; keep previous FIFO history */ + me->fifo_pc = pke_fifo_flush(& me->fifo); + me->qw_pc = 0; + /* clear registers, flag, other state */ + memset(me->regs, 0, sizeof(me->regs)); + me->fifo_qw_done = 0; + me->flags = 0; +} + + + /* Issue & swallow next PKE opcode if possible/available */ void @@ -519,20 +549,21 @@ pke_issue(SIM_DESC sd, struct pke_device* me) { struct fifo_quadword* fqw; unsigned_4 fw; - unsigned_4 cmd, intr, num; - unsigned_4 imm; + unsigned_4 cmd, intr; /* 1 -- fetch PKE instruction */ /* confirm availability of new quadword of PKE instructions */ - if(me->fifo_num_elements <= me->fifo_pc) + fqw = pke_fifo_access(& me->fifo, me->fifo_pc); + if(fqw == NULL) return; /* skip over DMA tag, if present */ pke_pc_advance(me, 0); + /* note: this can only change qw_pc from 0 to 2 and will not + invalidate fqw */ /* "fetch" instruction quadword and word */ - fqw = & me->fifo[me->fifo_pc]; fw = fqw->data[me->qw_pc]; /* store word in PKECODE register */ @@ -552,11 +583,9 @@ pke_issue(SIM_DESC sd, struct pke_device* me) /* check for stall/halt control bits */ if(PKE_REG_MASK_GET(me, STAT, PFS) || PKE_REG_MASK_GET(me, STAT, PSS) || /* note special treatment below */ - /* PEW bit not a reason to keep stalling - it's re-checked below */ - /* PGW bit not a reason to keep stalling - it's re-checked below */ - /* maskable stall controls: ER0, ER1, PIS */ - PKE_REG_MASK_GET(me, STAT, ER0) || - PKE_REG_MASK_GET(me, STAT, ER1) || + /* PEW bit not a reason to keep stalling - it's just an indication, re-computed below */ + /* PGW bit not a reason to keep stalling - it's just an indication, re-computed below */ + /* ER0/ER1 not a reason to keep stalling - it's just an indication */ PKE_REG_MASK_GET(me, STAT, PIS)) { /* (still) stalled */ @@ -591,8 +620,6 @@ pke_issue(SIM_DESC sd, struct pke_device* me) } else /* new interrupt-flagged instruction */ { - /* XXX: send interrupt to 5900? */ - /* set INT flag in STAT register */ PKE_REG_MASK_SET(me, STAT, INT, 1); /* set loop-prevention flag */ @@ -600,7 +627,7 @@ pke_issue(SIM_DESC sd, struct pke_device* me) /* set PIS if stall not masked */ if(!PKE_REG_MASK_GET(me, ERR, MII)) - PKE_REG_MASK_SET(me, STAT, PIS, 1); + pke_begin_interrupt_stall(me); /* suspend this instruction unless it's PKEMARK */ if(!IS_PKE_CMD(cmd, PKEMARK)) @@ -666,6 +693,142 @@ pke_issue(SIM_DESC sd, struct pke_device* me) +/* Clear out contents of FIFO; act as if it was empty. Return PC + pointing to one-past-last word. */ + +unsigned_4 +pke_fifo_flush(struct pke_fifo* fifo) +{ + /* don't modify any state! */ + return fifo->origin + fifo->next; +} + + + +/* Clear out contents of FIFO; make it really empty. */ + +void +pke_fifo_reset(struct pke_fifo* fifo) +{ + int i; + + /* clear fifo quadwords */ + for(i=0; inext; i++) + { + zfree(fifo->quadwords[i]); + fifo->quadwords[i] = NULL; + } + + /* reset pointers */ + fifo->origin = 0; + fifo->next = 0; +} + + + +/* Make space for the next quadword in the FIFO. Allocate/enlarge + FIFO pointer block if necessary. Return a pointer to it. */ + +struct fifo_quadword* +pke_fifo_fit(struct pke_fifo* fifo) +{ + struct fifo_quadword* fqw; + + /* out of space on quadword pointer array? */ + if(fifo->next == fifo->length) /* also triggered before fifo->quadwords allocated */ + { + struct fifo_quadword** new_qw; + unsigned_4 new_length = fifo->length + PKE_FIFO_GROW_SIZE; + + /* allocate new pointer block */ + new_qw = zalloc(new_length * sizeof(struct fifo_quadword*)); + ASSERT(new_qw != NULL); + + /* copy over old contents, if any */ + if(fifo->quadwords != NULL) + { + /* copy over old pointers to beginning of new block */ + memcpy(new_qw, fifo->quadwords, + fifo->length * sizeof(struct fifo_quadword*)); + + /* free old block */ + zfree(fifo->quadwords); + } + + /* replace pointers & counts */ + fifo->quadwords = new_qw; + fifo->length = new_length; + } + + /* sanity check */ + ASSERT(fifo->quadwords != NULL); + + /* allocate new quadword from heap */ + fqw = zalloc(sizeof(struct fifo_quadword)); + ASSERT(fqw != NULL); + + /* push quadword onto fifo */ + fifo->quadwords[fifo->next] = fqw; + fifo->next++; + return fqw; +} + + + +/* Return a pointer to the FIFO quadword with given absolute index, or + NULL if it is out of range */ + +struct fifo_quadword* +pke_fifo_access(struct pke_fifo* fifo, unsigned_4 qwnum) +{ + struct fifo_quadword* fqw; + + if((qwnum < fifo->origin) || /* before history */ + (qwnum >= fifo->origin + fifo->next)) /* after last available quadword */ + fqw = NULL; + else + { + ASSERT(fifo->quadwords != NULL); /* must be allocated already */ + fqw = fifo->quadwords[qwnum - fifo->origin]; /* pull out pointer from array */ + ASSERT(fqw != NULL); /* must be allocated already */ + } + + return fqw; +} + + +/* Authorize release of any FIFO entries older than given absolute quadword. */ +void +pke_fifo_old(struct pke_fifo* fifo, unsigned_4 qwnum) +{ + /* do we have any too-old FIFO elements? */ + if(fifo->origin + PKE_FIFO_ARCHEOLOGY < qwnum) + { + /* count quadwords to forget */ + int horizon = qwnum - (fifo->origin + PKE_FIFO_ARCHEOLOGY); + int i; + + /* free quadwords at indices below horizon */ + for(i=0; i < horizon; i++) + zfree(fifo->quadwords[i]); + + /* move surviving quadword pointers down to beginning of array */ + for(i=horizon; i < fifo->next; i++) + fifo->quadwords[i-horizon] = fifo->quadwords[i]; + + /* clear duplicate pointers */ + for(i=fifo->next - horizon; i < fifo->next; i++) + fifo->quadwords[i] = NULL; + + /* adjust FIFO pointers */ + fifo->origin = fifo->origin + horizon; + fifo->next = fifo->next - horizon; + } +} + + + + /* advance the PC by given number of data words; update STAT/FQC field; assume FIFO is filled enough; classify passed-over words; write FIFO trace line */ @@ -675,16 +838,19 @@ pke_pc_advance(struct pke_device* me, int num_words) { int num = num_words; struct fifo_quadword* fq = NULL; + unsigned_4 old_fifo_pc = me->fifo_pc; + ASSERT(num_words >= 0); /* printf("pke %d pc_advance num_words %d\n", me->pke_number, num_words); */ while(1) { - fq = & me->fifo[me->fifo_pc]; + /* find next quadword, if any */ + fq = pke_fifo_access(& me->fifo, me->fifo_pc); /* skip over DMA tag words if present in word 0 or 1 */ - if(fq->word_class[me->qw_pc] == wc_dma) + if(fq != NULL && fq->word_class[me->qw_pc] == wc_dma) { /* skip by going around loop an extra time */ num ++; @@ -694,6 +860,9 @@ pke_pc_advance(struct pke_device* me, int num_words) if(num == 0) break; + /* we are supposed to skip existing words */ + ASSERT(fq != NULL); + /* one word skipped */ num --; @@ -724,46 +893,67 @@ pke_pc_advance(struct pke_device* me, int num_words) fq->word_class[3], fq->word_class[2], fq->word_class[1], fq->word_class[0]); } - - /* XXX: zap old entries in FIFO */ } /* next quadword */ } + /* age old entries before PC */ + if(me->fifo_pc != old_fifo_pc) + { + /* we advanced the fifo-pc; authorize disposal of anything + before previous PKEcode */ + pke_fifo_old(& me->fifo, old_fifo_pc); + } + /* clear FQC if FIFO is now empty */ - if(me->fifo_num_elements == me->fifo_pc) + fq = pke_fifo_access(& me->fifo, me->fifo_pc); + if(fq == NULL) { PKE_REG_MASK_SET(me, STAT, FQC, 0); } else /* annote the word where the PC lands as an PKEcode */ { - fq = & me->fifo[me->fifo_pc]; - ASSERT(fq->word_class[me->qw_pc] == wc_pkecode || - fq->word_class[me->qw_pc] == wc_unknown); + ASSERT(fq->word_class[me->qw_pc] == wc_pkecode || fq->word_class[me->qw_pc] == wc_unknown); fq->word_class[me->qw_pc] = wc_pkecode; } } + + /* Return pointer to FIFO quadword containing given operand# in FIFO. `operand_num' starts at 1. Return pointer to operand word in last argument, if non-NULL. If FIFO is not full enough, return 0. Signal an ER0 indication upon skipping a DMA tag. */ struct fifo_quadword* -pke_pc_fifo(struct pke_device* me, int operand_num, unsigned_4** operand) +pke_pcrel_fifo(struct pke_device* me, int operand_num, unsigned_4** operand) { - int num = operand_num; + int num; int new_qw_pc, new_fifo_pc; struct fifo_quadword* fq = NULL; - ASSERT(num > 0); + /* check for validity of last search results in cache */ + if(me->last_fifo_pc == me->fifo_pc && + me->last_qw_pc == me->qw_pc && + operand_num > me->last_num) + { + /* continue search from last stop */ + new_fifo_pc = me->last_new_fifo_pc; + new_qw_pc = me->last_new_qw_pc; + num = operand_num - me->last_num; + } + else + { + /* start search from scratch */ + new_fifo_pc = me->fifo_pc; + new_qw_pc = me->qw_pc; + num = operand_num; + } - /* snapshot current pointers */ - new_fifo_pc = me->fifo_pc; - new_qw_pc = me->qw_pc; + ASSERT(num > 0); - /* printf("pke %d pc_fifo operand_num %d\n", me->pke_number, operand_num); */ + /* printf("pke %d pcrel_fifo operand_num %d\n", me->pke_number, operand_num); */ do { @@ -778,21 +968,22 @@ pke_pc_fifo(struct pke_device* me, int operand_num, unsigned_4** operand) new_fifo_pc ++; } + fq = pke_fifo_access(& me->fifo, new_fifo_pc); + /* check for FIFO underflow */ - if(me->fifo_num_elements == new_fifo_pc) - { - fq = NULL; - break; - } + if(fq == NULL) + break; /* skip over DMA tag words if present in word 0 or 1 */ - fq = & me->fifo[new_fifo_pc]; if(fq->word_class[new_qw_pc] == wc_dma) { + /* set ER0 */ + PKE_REG_MASK_SET(me, STAT, ER0, 1); + /* mismatch error! */ if(! PKE_REG_MASK_GET(me, ERR, ME0)) { - PKE_REG_MASK_SET(me, STAT, ER0, 1); + pke_begin_interrupt_stall(me); /* don't stall just yet -- finish this instruction */ /* the PPS_STALL state will be entered by pke_issue() next time */ } @@ -807,10 +998,18 @@ pke_pc_fifo(struct pke_device* me, int operand_num, unsigned_4** operand) { *operand = & fq->data[new_qw_pc]; - /* annote the word where the pseudo lands as an PKE operand */ - ASSERT(fq->word_class[new_qw_pc] == wc_pkedata || - fq->word_class[new_qw_pc] == wc_unknown); + /* annote the word where the pseudo-PC lands as an PKE operand */ + ASSERT(fq->word_class[new_qw_pc] == wc_pkedata || fq->word_class[new_qw_pc] == wc_unknown); fq->word_class[new_qw_pc] = wc_pkedata; + + /* store search results in cache */ + /* keys */ + me->last_fifo_pc = me->fifo_pc; + me->last_qw_pc = me->qw_pc; + /* values */ + me->last_num = operand_num; + me->last_new_fifo_pc = new_fifo_pc; + me->last_new_qw_pc = new_qw_pc; } return fq; @@ -822,15 +1021,15 @@ pke_pc_fifo(struct pke_device* me, int operand_num, unsigned_4** operand) them as an error (ER0). */ unsigned_4* -pke_pc_operand(struct pke_device* me, int operand_num) +pke_pcrel_operand(struct pke_device* me, int operand_num) { unsigned_4* operand = NULL; struct fifo_quadword* fifo_operand; - fifo_operand = pke_pc_fifo(me, operand_num, & operand); + fifo_operand = pke_pcrel_fifo(me, operand_num, & operand); if(fifo_operand == NULL) - ASSERT(operand == NULL); /* pke_pc_fifo() ought leave it untouched */ + ASSERT(operand == NULL); /* pke_pcrel_fifo() ought leave it untouched */ return operand; } @@ -842,7 +1041,7 @@ pke_pc_operand(struct pke_device* me, int operand_num) enough. Skip over DMA tags, but mark them as an error (ER0). */ unsigned_4 -pke_pc_operand_bits(struct pke_device* me, int bit_offset, int bit_width, unsigned_4* source_addr) +pke_pcrel_operand_bits(struct pke_device* me, int bit_offset, int bit_width, unsigned_4* source_addr) { unsigned_4* word = NULL; unsigned_4 value; @@ -853,7 +1052,7 @@ pke_pc_operand_bits(struct pke_device* me, int bit_offset, int bit_width, unsign bitnumber = bit_offset%32; /* find operand word with bitfield */ - fifo_operand = pke_pc_fifo(me, wordnumber + 1, &word); + fifo_operand = pke_pcrel_fifo(me, wordnumber + 1, &word); ASSERT(word != NULL); /* extract bitfield from word */ @@ -894,16 +1093,19 @@ pke_check_stall(struct pke_device* me, enum pke_check_target what) } else if(what == chk_path1) /* VU -> GPUIF */ { + ASSERT(me->pke_number == 1); if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 1) any_stall = 1; } else if(what == chk_path2) /* PKE -> GPUIF */ { + ASSERT(me->pke_number == 1); if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 2) any_stall = 1; } else if(what == chk_path3) /* DMA -> GPUIF */ { + ASSERT(me->pke_number == 1); if(BIT_MASK_GET(gpuif_stat, GPUIF_REG_STAT_APATH_B, GPUIF_REG_STAT_APATH_E) == 3) any_stall = 1; } @@ -942,6 +1144,18 @@ pke_flip_dbf(struct pke_device* me) } +/* set the STAT:PIS bit and send an interrupt to the 5900 */ +void +pke_begin_interrupt_stall(struct pke_device* me) +{ + /* set PIS */ + PKE_REG_MASK_SET(me, STAT, PIS, 1); + + /* XXX: send interrupt to 5900? */ +} + + + /* PKEcode handler functions -- responsible for checking and confirming old stall conditions, executing pkecode, updating PC and @@ -1036,12 +1250,12 @@ pke_code_mskpath3(struct pke_device* me, unsigned_4 pkecode) /* set appropriate bit */ if(BIT_MASK_GET(imm, PKE_REG_MSKPATH3_B, PKE_REG_MSKPATH3_E) != 0) - gif_mode = GIF_REG_MODE_M3R_MASK; + gif_mode = GIF_REG_STAT_M3P; else gif_mode = 0; - /* write register; patrickm code will look at M3R bit only */ - PKE_MEM_WRITE(me, GIF_REG_MODE, & gif_mode, 4); + /* write register to "read-only" register; gpuif code will look at M3P bit only */ + PKE_MEM_WRITE(me, GIF_REG_VIF_M3P, & gif_mode, 4); /* done */ pke_pc_advance(me, 1); @@ -1316,7 +1530,7 @@ pke_code_stmask(struct pke_device* me, unsigned_4 pkecode) unsigned_4* mask; /* check that FIFO has one more word for STMASK operand */ - mask = pke_pc_operand(me, 1); + mask = pke_pcrel_operand(me, 1); if(mask != NULL) { /* "transferring" operand */ @@ -1350,7 +1564,7 @@ pke_code_strow(struct pke_device* me, unsigned_4 pkecode) /* check that FIFO has four more words for STROW operand */ unsigned_4* last_op; - last_op = pke_pc_operand(me, 4); + last_op = pke_pcrel_operand(me, 4); if(last_op != NULL) { /* "transferring" operand */ @@ -1360,10 +1574,10 @@ pke_code_strow(struct pke_device* me, unsigned_4 pkecode) PKE_REG_MASK_SET(me, NUM, NUM, 1); /* copy ROW registers: must all exist if 4th operand exists */ - me->regs[PKE_REG_R0][0] = * pke_pc_operand(me, 1); - me->regs[PKE_REG_R1][0] = * pke_pc_operand(me, 2); - me->regs[PKE_REG_R2][0] = * pke_pc_operand(me, 3); - me->regs[PKE_REG_R3][0] = * pke_pc_operand(me, 4); + me->regs[PKE_REG_R0][0] = * pke_pcrel_operand(me, 1); + me->regs[PKE_REG_R1][0] = * pke_pcrel_operand(me, 2); + me->regs[PKE_REG_R2][0] = * pke_pcrel_operand(me, 3); + me->regs[PKE_REG_R3][0] = * pke_pcrel_operand(me, 4); /* set NUM */ PKE_REG_MASK_SET(me, NUM, NUM, 0); @@ -1387,7 +1601,7 @@ pke_code_stcol(struct pke_device* me, unsigned_4 pkecode) /* check that FIFO has four more words for STCOL operand */ unsigned_4* last_op; - last_op = pke_pc_operand(me, 4); + last_op = pke_pcrel_operand(me, 4); if(last_op != NULL) { /* "transferring" operand */ @@ -1397,10 +1611,10 @@ pke_code_stcol(struct pke_device* me, unsigned_4 pkecode) PKE_REG_MASK_SET(me, NUM, NUM, 1); /* copy COL registers: must all exist if 4th operand exists */ - me->regs[PKE_REG_C0][0] = * pke_pc_operand(me, 1); - me->regs[PKE_REG_C1][0] = * pke_pc_operand(me, 2); - me->regs[PKE_REG_C2][0] = * pke_pc_operand(me, 3); - me->regs[PKE_REG_C3][0] = * pke_pc_operand(me, 4); + me->regs[PKE_REG_C0][0] = * pke_pcrel_operand(me, 1); + me->regs[PKE_REG_C1][0] = * pke_pcrel_operand(me, 2); + me->regs[PKE_REG_C2][0] = * pke_pcrel_operand(me, 3); + me->regs[PKE_REG_C3][0] = * pke_pcrel_operand(me, 4); /* set NUM */ PKE_REG_MASK_SET(me, NUM, NUM, 0); @@ -1433,7 +1647,7 @@ pke_code_mpg(struct pke_device* me, unsigned_4 pkecode) if(num==0) num=0x100; /* check that FIFO has a few more words for MPG operand */ - last_mpg_word = pke_pc_operand(me, num*2); /* num: number of 64-bit words */ + last_mpg_word = pke_pcrel_operand(me, num*2); /* num: number of 64-bit words */ if(last_mpg_word != NULL) { /* perform implied FLUSHE */ @@ -1488,9 +1702,9 @@ pke_code_mpg(struct pke_device* me, unsigned_4 pkecode) vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 2; /* Fetch operand words; assume they are already little-endian for VU imem */ - fq = pke_pc_fifo(me, i*2 + 1, & operand); + fq = pke_pcrel_fifo(me, i*2 + 1, & operand); vu_lower_opcode = *operand; - vu_upper_opcode = *pke_pc_operand(me, i*2 + 2); + vu_upper_opcode = *pke_pcrel_operand(me, i*2 + 2); /* write data into VU memory */ /* lower (scalar) opcode comes in first word ; macro performs H2T! */ @@ -1541,7 +1755,7 @@ pke_code_direct(struct pke_device* me, unsigned_4 pkecode) /* map zero to max+1 */ if(imm==0) imm=0x10000; - last_direct_word = pke_pc_operand(me, imm*4); /* imm: number of 128-bit words */ + last_direct_word = pke_pcrel_operand(me, imm*4); /* imm: number of 128-bit words */ if(last_direct_word != NULL) { /* VU idle */ @@ -1554,11 +1768,11 @@ pke_code_direct(struct pke_device* me, unsigned_4 pkecode) /* transfer GPUIF quadwords, one word per iteration */ for(i=0; i= addrwl) n = num; else - n = cl * (num/wl) + PKE_LIMIT(num % wl, cl); + n = cl * (nummx / addrwl) + PKE_LIMIT(nummx % addrwl, cl); num_operands = (31 + (32 >> vl) * (vn+1) * n)/32; /* round up to next word */ /* confirm that FIFO has enough words in it */ if(num_operands > 0) - last_operand_word = pke_pc_operand(me, num_operands); + last_operand_word = pke_pcrel_operand(me, num_operands); if(last_operand_word != NULL || num_operands == 0) { address_word vu_addr_base, vutrack_addr_base; @@ -1645,7 +1867,7 @@ pke_code_unpack(struct pke_device* me, unsigned_4 pkecode) } /* set NUM */ - PKE_REG_MASK_SET(me, NUM, NUM, num == 0 ? 0x100 : num ); + PKE_REG_MASK_SET(me, NUM, NUM, nummx); /* transfer given number of vectors */ vector_num_out = 0; /* output vector number being processed */ @@ -1669,7 +1891,6 @@ pke_code_unpack(struct pke_device* me, unsigned_4 pkecode) if(cl >= wl) { /* map zero to max+1 */ - int addrwl = (wl == 0) ? 0x0100 : wl; vu_addr = vu_addr_base + 16 * (BIT_MASK_GET(imm, 0, 9) + (vector_num_out / addrwl) * cl + (vector_num_out % addrwl)); @@ -1703,8 +1924,8 @@ pke_code_unpack(struct pke_device* me, unsigned_4 pkecode) /* For cyclic unpack, next operand quadword may come from instruction stream or be zero. */ - if((num == 0 && cl == 0 && wl == 0) || /* shortcut clear */ - ((cl < wl) && ((vector_num_out % wl) >= cl))) /* && short-circuit asserts wl != 0 */ + if((cl < addrwl) && + (vector_num_out % addrwl) >= cl) { /* clear operand - used only in a "indeterminate" state */ for(i = 0; i < 4; i++) @@ -1713,7 +1934,7 @@ pke_code_unpack(struct pke_device* me, unsigned_4 pkecode) else { /* compute packed vector dimensions */ - int vectorbits, unitbits; + int vectorbits = 0, unitbits = 0; if(vl < 3) /* PKE_UNPACK_*_{32,16,8} */ { @@ -1727,9 +1948,8 @@ pke_code_unpack(struct pke_device* me, unsigned_4 pkecode) } else /* illegal unpack variant */ { - /* treat as illegal instruction */ - pke_code_error(me, pkecode); - return; + /* should have been caught at top of function */ + ASSERT(0); } /* loop over columns */ @@ -1745,8 +1965,19 @@ pke_code_unpack(struct pke_device* me, unsigned_4 pkecode) if(vl == 3 && vn == 3 && i == 3) /* PKE_UNPACK_V4_5 */ unitbits = 1; + /* confirm we're not reading more than we said we needed */ + if(vector_num_in * vectorbits >= num_operands * 32) + { + /* this condition may be triggered by illegal + PKEcode / CYCLE combinations. */ + pke_code_error(me, pkecode); + /* XXX: this case needs to be better understood, + and detected at a better time. */ + return; + } + /* fetch bitfield operand */ - operand = pke_pc_operand_bits(me, bitoffset, unitbits, & source_addr); + operand = pke_pcrel_operand_bits(me, bitoffset, unitbits, & source_addr); /* selectively sign-extend; not for V4_5 1-bit value */ if(usn || unitbits == 1) @@ -1755,37 +1986,54 @@ pke_code_unpack(struct pke_device* me, unsigned_4 pkecode) unpacked_data[i] = SEXT32(operand, unitbits-1); } - /* clear remaining top words in vector */ - for(; i<4; i++) - unpacked_data[i] = 0; + /* set remaining top words in vector */ + for(i=vn+1; i<4; i++) + { + if(vn == 0) /* S_{32,16,8}: copy lowest element */ + unpacked_data[i] = unpacked_data[0]; + else + unpacked_data[i] = 0; + } /* consumed a vector from the PKE instruction stream */ vector_num_in ++; } /* unpack word from instruction operand */ + /* process STMOD register for accumulation operations */ + switch(PKE_REG_MASK_GET(me, MODE, MDE)) + { + case PKE_MODE_ADDROW: /* add row registers to output data */ + case PKE_MODE_ACCROW: /* same .. later conditionally accumulate */ + for(i=0; i<4; i++) + /* exploit R0..R3 contiguity */ + unpacked_data[i] += me->regs[PKE_REG_R0 + i][0]; + break; + + case PKE_MODE_INPUT: /* pass data through */ + default: /* specified as undefined */ + ; + } + /* compute replacement word */ if(m) /* use mask register? */ { /* compute index into mask register for this word */ - int addrwl = (wl == 0) ? 0x0100 : wl; int mask_index = PKE_LIMIT(vector_num_out % addrwl, 3); for(i=0; i<4; i++) /* loop over columns */ { int mask_op = PKE_MASKREG_GET(me, mask_index, i); unsigned_4* masked_value = NULL; - unsigned_4 zero = 0; switch(mask_op) { case PKE_MASKREG_INPUT: - /* for vn == 0, all columns are copied from column 0 */ - if(vn == 0) - masked_value = & unpacked_data[0]; - else if(i > vn) - masked_value = & zero; /* arbitrary data: undefined in spec */ - else - masked_value = & unpacked_data[i]; + masked_value = & unpacked_data[i]; + + /* conditionally accumulate */ + if(PKE_REG_MASK_GET(me, MODE, MDE) == PKE_MODE_ACCROW) + me->regs[PKE_REG_R0 + i][0] = unpacked_data[i]; + break; case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */ @@ -1814,29 +2062,11 @@ pke_code_unpack(struct pke_device* me, unsigned_4 pkecode) { /* no mask - just copy over entire unpacked quadword */ memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data)); - } - - /* process STMOD register for accumulation operations */ - switch(PKE_REG_MASK_GET(me, MODE, MDE)) - { - case PKE_MODE_ADDROW: /* add row registers to output data */ - for(i=0; i<4; i++) - /* exploit R0..R3 contiguity */ - vu_new_data[i] += me->regs[PKE_REG_R0 + i][0]; - break; - - case PKE_MODE_ACCROW: /* add row registers to output data; accumulate */ - for(i=0; i<4; i++) - { - /* exploit R0..R3 contiguity */ - vu_new_data[i] += me->regs[PKE_REG_R0 + i][0]; - me->regs[PKE_REG_R0 + i][0] = vu_new_data[i]; - } - break; - case PKE_MODE_INPUT: /* pass data through */ - default: - ; + /* conditionally store accumulated row results */ + if(PKE_REG_MASK_GET(me, MODE, MDE) == PKE_MODE_ACCROW) + for(i=0; i<4; i++) + me->regs[PKE_REG_R0 + i][0] = unpacked_data[i]; } /* write new VU data word at address; reverse words if needed */ @@ -1862,6 +2092,9 @@ pke_code_unpack(struct pke_device* me, unsigned_4 pkecode) } /* vector transfer loop */ while(PKE_REG_MASK_GET(me, NUM, NUM) > 0); + /* confirm we've written as many vectors as told */ + ASSERT(nummx == vector_num_out); + /* done */ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_IDLE); pke_pc_advance(me, 1 + num_operands); @@ -1878,10 +2111,12 @@ pke_code_unpack(struct pke_device* me, unsigned_4 pkecode) void pke_code_error(struct pke_device* me, unsigned_4 pkecode) { + /* set ER1 flag in STAT register */ + PKE_REG_MASK_SET(me, STAT, ER1, 1); + if(! PKE_REG_MASK_GET(me, ERR, ME1)) { - /* set ER1 flag in STAT register */ - PKE_REG_MASK_SET(me, STAT, ER1, 1); + pke_begin_interrupt_stall(me); PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_STALL); } else