+ case ENCODE_RELAX (STATE_COMPLEX_BRANCH, STATE_WORD):
+ fix_new (fragP, fragP->fr_fix, 2, fragP->fr_symbol,
+ fragP->fr_offset, 1, NO_RELOC);
+ extension = 2;
+ break;
+
+ case ENCODE_RELAX (STATE_COMPLEX_BRANCH, STATE_LONG):
+ addressP[0] = 2;
+ addressP[1] = 0;
+ addressP[2] = VAX_BRB;
+ addressP[3] = 6;
+ addressP[4] = VAX_JMP;
+ addressP[5] = VAX_PC_RELATIVE_MODE;
+ fix_new (fragP, fragP->fr_fix + 6, 4, fragP->fr_symbol,
+ fragP->fr_offset, 1, NO_RELOC);
+ extension = 10;
+ break;
+
+ case ENCODE_RELAX (STATE_COMPLEX_HOP, STATE_BYTE):
+ fix_new (fragP, fragP->fr_fix, 1, fragP->fr_symbol,
+ fragP->fr_offset, 1, NO_RELOC);
+ extension = 1;
+ break;
+
+ case ENCODE_RELAX (STATE_COMPLEX_HOP, STATE_WORD):
+ addressP[0] = 2;
+ addressP[1] = VAX_BRB;
+ addressP[2] = 3;
+ addressP[3] = VAX_BRW;
+ fix_new (fragP, fragP->fr_fix + 4, 2, fragP->fr_symbol,
+ fragP->fr_offset, 1, NO_RELOC);
+ extension = 6;
+ break;
+
+ case ENCODE_RELAX (STATE_COMPLEX_HOP, STATE_LONG):
+ addressP[0] = 2;
+ addressP[1] = VAX_BRB;
+ addressP[2] = 6;
+ addressP[3] = VAX_JMP;
+ addressP[4] = VAX_PC_RELATIVE_MODE;
+ fix_new (fragP, fragP->fr_fix + 5, 4, fragP->fr_symbol,
+ fragP->fr_offset, 1, NO_RELOC);
+ extension = 9;
+ break;
+
+ default:
+ BAD_CASE (fragP->fr_subtype);
+ break;
+ }
+ fragP->fr_fix += extension;
+}
+
+/* Translate internal format of relocation info into target format.
+
+ On vax: first 4 bytes are normal unsigned long, next three bytes
+ are symbolnum, least sig. byte first. Last byte is broken up with
+ the upper nibble as nuthin, bit 3 as extern, bits 2 & 1 as length, and
+ bit 0 as pcrel. */
+#ifdef comment
+void
+md_ri_to_chars (char *the_bytes, struct reloc_info_generic ri)
+{
+ /* This is easy. */
+ md_number_to_chars (the_bytes, ri.r_address, sizeof (ri.r_address));
+ /* Now the fun stuff. */
+ the_bytes[6] = (ri.r_symbolnum >> 16) & 0x0ff;
+ the_bytes[5] = (ri.r_symbolnum >> 8) & 0x0ff;
+ the_bytes[4] = ri.r_symbolnum & 0x0ff;
+ the_bytes[7] = (((ri.r_extern << 3) & 0x08) | ((ri.r_length << 1) & 0x06)
+ | ((ri.r_pcrel << 0) & 0x01)) & 0x0F;
+}
+
+#endif /* comment */
+
+/* BUGS, GRIPES, APOLOGIA, etc.
+
+ The opcode table 'votstrs' needs to be sorted on opcode frequency.
+ That is, AFTER we hash it with hash_...(), we want most-used opcodes
+ to come out of the hash table faster.
+
+ I am sorry to inflict yet another VAX assembler on the world, but
+ RMS says we must do everything from scratch, to prevent pin-heads
+ restricting this software.
+
+ This is a vaguely modular set of routines in C to parse VAX
+ assembly code using DEC mnemonics. It is NOT un*x specific.
+
+ The idea here is that the assembler has taken care of all:
+ labels
+ macros
+ listing
+ pseudo-ops
+ line continuation
+ comments
+ condensing any whitespace down to exactly one space
+ and all we have to do is parse 1 line into a vax instruction
+ partially formed. We will accept a line, and deliver:
+ an error message (hopefully empty)
+ a skeleton VAX instruction (tree structure)
+ textual pointers to all the operand expressions
+ a warning message that notes a silly operand (hopefully empty)
+
+ E D I T H I S T O R Y
+
+ 17may86 Dean Elsner. Bug if line ends immediately after opcode.
+ 30apr86 Dean Elsner. New vip_op() uses arg block so change call.
+ 6jan86 Dean Elsner. Crock vip_begin() to call vip_op_defaults().
+ 2jan86 Dean Elsner. Invent synthetic opcodes.
+ Widen vax_opcodeT to 32 bits. Use a bit for VIT_OPCODE_SYNTHETIC,
+ which means this is not a real opcode, it is like a macro; it will
+ be relax()ed into 1 or more instructions.
+ Use another bit for VIT_OPCODE_SPECIAL if the op-code is not optimised
+ like a regular branch instruction. Option added to vip_begin():
+ exclude synthetic opcodes. Invent synthetic_votstrs[].
+ 31dec85 Dean Elsner. Invent vit_opcode_nbytes.
+ Also make vit_opcode into a char[]. We now have n-byte vax opcodes,
+ so caller's don't have to know the difference between a 1-byte & a
+ 2-byte op-code. Still need vax_opcodeT concept, so we know how
+ big an object must be to hold an op.code.
+ 30dec85 Dean Elsner. Widen typedef vax_opcodeT in "vax-inst.h"
+ because vax opcodes may be 16 bits. Our crufty C compiler was
+ happily initialising 8-bit vot_codes with 16-bit numbers!
+ (Wouldn't the 'phone company like to compress data so easily!)
+ 29dec85 Dean Elsner. New static table vax_operand_width_size[].
+ Invented so we know hw many bytes a "I^#42" needs in its immediate
+ operand. Revised struct vop in "vax-inst.h": explicitly include
+ byte length of each operand, and it's letter-code datum type.
+ 17nov85 Dean Elsner. Name Change.
+ Due to ar(1) truncating names, we learned the hard way that
+ "vax-inst-parse.c" -> "vax-inst-parse." dropping the "o" off
+ the archived object name. SO... we shortened the name of this
+ source file, and changed the makefile. */
+
+/* Handle of the OPCODE hash table. */
+static struct hash_control *op_hash;
+
+/* In: 1 character, from "bdfghloqpw" being the data-type of an operand
+ of a vax instruction.
+
+ Out: the length of an operand of that type, in bytes.
+ Special branch operands types "-?!" have length 0. */
+
+static const short int vax_operand_width_size[256] =
+{
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, 0, 8, 0, 4, 8, 16, 0, 0, 0, 4, 0, 0,16, /* ..b.d.fgh...l..o */
+ 0, 8, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, /* .q.....w........ */
+ 0, 0, 1, 0, 8, 0, 4, 8, 16, 0, 0, 0, 4, 0, 0,16, /* ..b.d.fgh...l..o */
+ 0, 8, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, /* .q.....w........ */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+\f
+/* This perversion encodes all the vax opcodes as a bunch of strings.
+ RMS says we should build our hash-table at run-time. Hmm.
+ Please would someone arrange these in decreasing frequency of opcode?
+ Because of the way hash_...() works, the most frequently used opcode
+ should be textually first and so on.
+
+ Input for this table was 'vax.opcodes', awk(1)ed by 'vax.opcodes.c.awk' .
+ So change 'vax.opcodes', then re-generate this table. */
+
+#include "opcode/vax.h"
+\f
+/* This is a table of optional op-codes. All of them represent
+ 'synthetic' instructions that seem popular.
+
+ Here we make some pseudo op-codes. Every code has a bit set to say
+ it is synthetic. This lets you catch them if you want to
+ ban these opcodes. They are mnemonics for "elastic" instructions
+ that are supposed to assemble into the fewest bytes needed to do a
+ branch, or to do a conditional branch, or whatever.
+
+ The opcode is in the usual place [low-order n*8 bits]. This means
+ that if you mask off the bucky bits, the usual rules apply about
+ how long the opcode is.
+
+ All VAX branch displacements come at the end of the instruction.
+ For simple branches (1-byte opcode + 1-byte displacement) the last
+ operand is coded 'b?' where the "data type" '?' is a clue that we
+ may reverse the sense of the branch (complement lowest order bit)
+ and branch around a jump. This is by far the most common case.
+ That is why the VIT_OPCODE_SYNTHETIC bit is set: it says this is
+ a 0-byte op-code followed by 2 or more bytes of operand address.
+
+ If the op-code has VIT_OPCODE_SPECIAL set, then we have a more unusual
+ case.
+
+ For JBSB & JBR the treatment is the similar, except (1) we have a 'bw'
+ option before (2) we can directly JSB/JMP because there is no condition.
+ These operands have 'b-' as their access/data type.
+
+ That leaves a bunch of random opcodes: JACBx, JxOBxxx. In these
+ cases, we do the same idea. JACBxxx are all marked with a 'b!'
+ JAOBxxx & JSOBxxx are marked with a 'b:'. */
+#if (VIT_OPCODE_SYNTHETIC != 0x80000000)
+#error "You have just broken the encoding below, which assumes the sign bit means 'I am an imaginary instruction'."
+#endif
+
+#if (VIT_OPCODE_SPECIAL != 0x40000000)
+#error "You have just broken the encoding below, which assumes the 0x40 M bit means 'I am not to be "optimised" the way normal branches are'."
+#endif
+
+static const struct vot
+ synthetic_votstrs[] =
+{
+ {"jbsb", {"b-", 0xC0000010}}, /* BSD 4.2 */
+/* jsb used already */
+ {"jbr", {"b-", 0xC0000011}}, /* BSD 4.2 */
+ {"jr", {"b-", 0xC0000011}}, /* consistent */
+ {"jneq", {"b?", 0x80000012}},
+ {"jnequ", {"b?", 0x80000012}},
+ {"jeql", {"b?", 0x80000013}},
+ {"jeqlu", {"b?", 0x80000013}},
+ {"jgtr", {"b?", 0x80000014}},
+ {"jleq", {"b?", 0x80000015}},
+/* un-used opcodes here */
+ {"jgeq", {"b?", 0x80000018}},
+ {"jlss", {"b?", 0x80000019}},
+ {"jgtru", {"b?", 0x8000001a}},
+ {"jlequ", {"b?", 0x8000001b}},
+ {"jvc", {"b?", 0x8000001c}},
+ {"jvs", {"b?", 0x8000001d}},
+ {"jgequ", {"b?", 0x8000001e}},
+ {"jcc", {"b?", 0x8000001e}},
+ {"jlssu", {"b?", 0x8000001f}},
+ {"jcs", {"b?", 0x8000001f}},
+
+ {"jacbw", {"rwrwmwb!", 0xC000003d}},
+ {"jacbf", {"rfrfmfb!", 0xC000004f}},
+ {"jacbd", {"rdrdmdb!", 0xC000006f}},
+ {"jacbb", {"rbrbmbb!", 0xC000009d}},
+ {"jacbl", {"rlrlmlb!", 0xC00000f1}},
+ {"jacbg", {"rgrgmgb!", 0xC0004ffd}},
+ {"jacbh", {"rhrhmhb!", 0xC0006ffd}},
+
+ {"jbs", {"rlvbb?", 0x800000e0}},
+ {"jbc", {"rlvbb?", 0x800000e1}},
+ {"jbss", {"rlvbb?", 0x800000e2}},
+ {"jbcs", {"rlvbb?", 0x800000e3}},
+ {"jbsc", {"rlvbb?", 0x800000e4}},
+ {"jbcc", {"rlvbb?", 0x800000e5}},
+ {"jbssi", {"rlvbb?", 0x800000e6}},
+ {"jbcci", {"rlvbb?", 0x800000e7}},
+ {"jlbs", {"rlb?", 0x800000e8}},
+ {"jlbc", {"rlb?", 0x800000e9}},
+
+ {"jaoblss", {"rlmlb:", 0xC00000f2}},
+ {"jaobleq", {"rlmlb:", 0xC00000f3}},
+ {"jsobgeq", {"mlb:", 0xC00000f4}},
+ {"jsobgtr", {"mlb:", 0xC00000f5}},
+
+/* CASEx has no branch addresses in our conception of it. */
+/* You should use ".word ..." statements after the "case ...". */
+
+ {"", {"", 0}} /* Empty is end sentinel. */
+};
+\f
+/* Because this module is useful for both VMS and UN*X style assemblers
+ and because of the variety of UN*X assemblers we must recognise
+ the different conventions for assembler operand notation. For example
+ VMS says "#42" for immediate mode, while most UN*X say "$42".
+ We permit arbitrary sets of (single) characters to represent the
+ 3 concepts that DEC writes '#', '@', '^'. */
+
+/* Character tests. */
+#define VIP_IMMEDIATE 01 /* Character is like DEC # */
+#define VIP_INDIRECT 02 /* Char is like DEC @ */
+#define VIP_DISPLEN 04 /* Char is like DEC ^ */
+
+#define IMMEDIATEP(c) (vip_metacharacters [(c) & 0xff] & VIP_IMMEDIATE)
+#define INDIRECTP(c) (vip_metacharacters [(c) & 0xff] & VIP_INDIRECT)
+#define DISPLENP(c) (vip_metacharacters [(c) & 0xff] & VIP_DISPLEN)
+
+/* We assume 8 bits per byte. Use vip_op_defaults() to set these up BEFORE we
+ are ever called. */
+
+#if defined(CONST_TABLE)
+#define _ 0,
+#define I VIP_IMMEDIATE,
+#define S VIP_INDIRECT,
+#define D VIP_DISPLEN,
+static const char
+vip_metacharacters[256] =
+{
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /* ^@ ^A ^B ^C ^D ^E ^F ^G ^H ^I ^J ^K ^L ^M ^N ^O*/
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /* ^P ^Q ^R ^S ^T ^U ^V ^W ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
+ _ _ _ _ I _ _ _ _ _ S _ _ _ _ _ /* sp ! " # $ % & ' ( ) * + , - . / */
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /*0 1 2 3 4 5 6 7 8 9 : ; < = > ?*/
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /*@ A B C D E F G H I J K L M N O*/
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /*P Q R S T U V W X Y Z [ \ ] ^ _*/
+ D _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /*` a b c d e f g h i j k l m n o*/
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /*p q r s t u v w x y z { | } ~ ^?*/
+
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+};
+#undef _
+#undef I
+#undef S
+#undef D
+
+#else
+
+static char vip_metacharacters[256];
+
+static void
+vip_op_1 (int bit, const char *syms)
+{
+ unsigned char t;
+
+ while ((t = *syms++) != 0)
+ vip_metacharacters[t] |= bit;
+}
+
+/* Can be called any time. More arguments may appear in future. */
+static void
+vip_op_defaults (const char *immediate, const char *indirect, const char *displen)
+{
+ vip_op_1 (VIP_IMMEDIATE, immediate);
+ vip_op_1 (VIP_INDIRECT, indirect);
+ vip_op_1 (VIP_DISPLEN, displen);
+}
+
+#endif
+
+/* Call me once before you decode any lines.
+ I decode votstrs into a hash table at op_hash (which I create).
+ I return an error text or null.
+ If you want, I will include the 'synthetic' jXXX instructions in the
+ instruction table.
+ You must nominate metacharacters for eg DEC's "#", "@", "^". */
+
+static const char *
+vip_begin (int synthetic_too, /* 1 means include jXXX op-codes. */
+ const char *immediate,
+ const char *indirect,
+ const char *displen)
+{
+ const struct vot *vP; /* scan votstrs */
+ const char *retval = 0; /* error text */
+
+ op_hash = hash_new ();
+
+ for (vP = votstrs; *vP->vot_name && !retval; vP++)
+ retval = hash_insert (op_hash, vP->vot_name, (void *) &vP->vot_detail);
+
+ if (synthetic_too)
+ for (vP = synthetic_votstrs; *vP->vot_name && !retval; vP++)
+ retval = hash_insert (op_hash, vP->vot_name, (void *) &vP->vot_detail);
+
+#ifndef CONST_TABLE
+ vip_op_defaults (immediate, indirect, displen);
+#endif
+
+ return retval;
+}
+
+/* Take 3 char.s, the last of which may be `\0` (non-existent)
+ and return the VAX register number that they represent.
+
+ Return -1 if they don't form a register name. Good names return
+ a number from 0:15 inclusive.
+
+ Case is not important in a name.
+
+ Register names understood are:
+
+ R0
+ R1
+ R2
+ R3
+ R4
+ R5
+ R6
+ R7
+ R8
+ R9
+ R10
+ R11
+ R12 AP
+ R13 FP
+ R14 SP
+ R15 PC */
+
+#define AP 12
+#define FP 13
+#define SP 14
+#define PC 15
+
+/* Returns the register number of something like '%r15' or 'ap', supplied
+ in four single chars. Returns -1 if the register isn't recognized,
+ 0..15 otherwise. */
+static int
+vax_reg_parse (char c1, char c2, char c3, char c4)
+{
+ int retval = -1;
+
+#ifdef OBJ_ELF
+ if (c1 != '%') /* Register prefixes are mandatory for ELF. */
+ return retval;
+ c1 = c2;
+ c2 = c3;
+ c3 = c4;
+#endif
+#ifdef OBJ_VMS
+ if (c4 != 0) /* Register prefixes are not allowed under VMS. */
+ return retval;
+#endif
+#ifdef OBJ_AOUT
+ if (c1 == '%') /* Register prefixes are optional under a.out. */
+ {
+ c1 = c2;
+ c2 = c3;
+ c3 = c4;
+ }
+ else if (c3 && c4) /* Can't be 4 characters long. */
+ return retval;
+#endif
+
+ c1 = TOLOWER (c1);
+ c2 = TOLOWER (c2);
+ if (ISDIGIT (c2) && c1 == 'r')
+ {
+ retval = c2 - '0';
+ if (ISDIGIT (c3))
+ {
+ retval = retval * 10 + c3 - '0';
+ retval = (retval > 15) ? -1 : retval;
+ /* clamp the register value to 1 hex digit */
+ }
+ else if (c3)
+ retval = -1; /* c3 must be '\0' or a digit. */
+ }
+ else if (c3) /* There are no three letter regs. */
+ retval = -1;
+ else if (c2 == 'p')
+ {
+ switch (c1)
+ {
+ case 's':
+ retval = SP;
+ break;
+ case 'f':
+ retval = FP;
+ break;
+ case 'a':
+ retval = AP;
+ break;
+ default:
+ retval = -1;
+ }
+ }
+ else if (c1 == 'p' && c2 == 'c')
+ retval = PC;
+ else
+ retval = -1;
+ return retval;
+}
+
+/* Parse a vax operand in DEC assembler notation.
+ For speed, expect a string of whitespace to be reduced to a single ' '.
+ This is the case for GNU AS, and is easy for other DEC-compatible
+ assemblers.
+
+ Knowledge about DEC VAX assembler operand notation lives here.
+ This doesn't even know what a register name is, except it believes
+ all register names are 2 or 3 characters, and lets vax_reg_parse() say
+ what number each name represents.
+ It does, however, know that PC, SP etc are special registers so it can
+ detect addressing modes that are silly for those registers.
+
+ Where possible, it delivers 1 fatal or 1 warning message if the operand
+ is suspect. Exactly what we test for is still evolving.
+
+ ---
+ Arg block.
+
+ There were a number of 'mismatched argument type' bugs to vip_op.
+ The most general solution is to typedef each (of many) arguments.
+ We used instead a typedef'd argument block. This is less modular
+ than using separate return pointers for each result, but runs faster
+ on most engines, and seems to keep programmers happy. It will have
+ to be done properly if we ever want to use vip_op as a general-purpose
+ module (it was designed to be).
+
+ G^
+
+ Doesn't support DEC "G^" format operands. These always take 5 bytes
+ to express, and code as modes 8F or 9F. Reason: "G^" deprives you of
+ optimising to (say) a "B^" if you are lucky in the way you link.
+ When someone builds a linker smart enough to convert "G^" to "B^", "W^"
+ whenever possible, then we should implement it.
+ If there is some other use for "G^", feel free to code it in!
+
+ speed
+
+ If I nested if()s more, I could avoid testing (*err) which would save
+ time, space and page faults. I didn't nest all those if()s for clarity
+ and because I think the mode testing can be re-arranged 1st to test the
+ commoner constructs 1st. Does anybody have statistics on this?
+
+ error messages
+
+ In future, we should be able to 'compose' error messages in a scratch area
+ and give the user MUCH more informative error messages. Although this takes
+ a little more code at run-time, it will make this module much more self-
+ documenting. As an example of what sucks now: most error messages have
+ hardwired into them the DEC VAX metacharacters "#^@" which are nothing like
+ the Un*x characters "$`*", that most users will expect from this AS.
+
+ ----
+
+ The input is a string, ending with '\0'.
+
+ We also require a 'hint' of what kind of operand is expected: so
+ we can remind caller not to write into literals for instance.
+
+ The output is a skeletal instruction.
+
+ The algorithm has two parts.
+ 1. extract the syntactic features (parse off all the @^#-()+[] mode crud);
+ 2. express the @^#-()+[] as some parameters suited to further analysis.
+
+ 2nd step is where we detect the googles of possible invalid combinations
+ a human (or compiler) might write. Note that if we do a half-way
+ decent assembler, we don't know how long to make (eg) displacement
+ fields when we first meet them (because they may not have defined values).
+ So we must wait until we know how many bits are needed for each address,
+ then we can know both length and opcodes of instructions.
+ For reason(s) above, we will pass to our caller a 'broken' instruction
+ of these major components, from which our caller can generate instructions:
+ - displacement length I^ S^ L^ B^ W^ unspecified
+ - mode (many)
+ - register R0-R15 or absent
+ - index register R0-R15 or absent
+ - expression text what we don't parse
+ - error text(s) why we couldn't understand the operand
+
+ ----
+
+ To decode output of this, test errtxt. If errtxt[0] == '\0', then
+ we had no errors that prevented parsing. Also, if we ever report
+ an internal bug, errtxt[0] is set non-zero. So one test tells you
+ if the other outputs are to be taken seriously.
+
+ ----
+
+ Dec defines the semantics of address modes (and values)
+ by a two-letter code, explained here.
+
+ letter 1: access type
+
+ a address calculation - no data access, registers forbidden
+ b branch displacement
+ m read - let go of bus - write back "modify"
+ r read
+ v bit field address: like 'a' but registers are OK
+ w write
+ space no operator (eg ".long foo") [our convention]
+
+ letter 2: data type (i.e. width, alignment)
+
+ b byte
+ d double precision floating point (D format)
+ f single precision floating point (F format)
+ g G format floating
+ h H format floating
+ l longword
+ o octaword
+ q quadword
+ w word
+ ? simple synthetic branch operand
+ - unconditional synthetic JSB/JSR operand
+ ! complex synthetic branch operand
+
+ The '-?!' letter 2's are not for external consumption. They are used
+ for various assemblers. Generally, all unknown widths are assumed 0.
+ We don't limit your choice of width character.
+
+ DEC operands are hard work to parse. For example, '@' as the first
+ character means indirect (deferred) mode but elsewhere it is a shift
+ operator.
+ The long-winded explanation of how this is supposed to work is
+ cancelled. Read a DEC vax manual.
+ We try hard not to parse anything that MIGHT be part of the expression
+ buried in that syntax. For example if we see @...(Rn) we don't check
+ for '-' before the '(' because mode @-(Rn) does not exist.
+
+ After parsing we have:
+
+ at 1 if leading '@' (or Un*x '*')
+ len takes one value from " bilsw". eg B^ -> 'b'.
+ hash 1 if leading '#' (or Un*x '$')
+ expr_begin, expr_end the expression we did not parse
+ even though we don't interpret it, we make use
+ of its presence or absence.
+ sign -1: -(Rn) 0: absent +1: (Rn)+
+ paren 1 if () are around register
+ reg major register number 0:15 -1 means absent
+ ndx index register number 0:15 -1 means absent
+
+ Again, I dare not explain it: just trace ALL the code!
+
+ Summary of vip_op outputs.
+
+ mode reg len ndx
+ (Rn) => @Rn
+ {@}Rn 5+@ n ' ' optional
+ branch operand 0 -1 ' ' -1
+ S^#foo 0 -1 's' -1
+ -(Rn) 7 n ' ' optional
+ {@}(Rn)+ 8+@ n ' ' optional
+ {@}#foo, no S^ 8+@ PC " i" optional
+ {@}{q^}{(Rn)} 10+@+q option " bwl" optional */
+
+/* Dissect user-input 'optext' (which is something like "@B^foo@bar(AP)[FP]:")
+ using the vop in vopP. vopP's vop_access and vop_width. We fill _ndx, _reg,
+ _mode, _short, _warn, _error, _expr_begin, _expr_end and _nbytes. */
+
+static void
+vip_op (char *optext, struct vop *vopP)
+{
+ /* Track operand text forward. */
+ char *p;
+ /* Track operand text backward. */
+ char *q;
+ /* 1 if leading '@' ('*') seen. */
+ int at;
+ /* one of " bilsw" */
+ char len;
+ /* 1 if leading '#' ('$') seen. */
+ int hash;
+ /* -1, 0 or +1. */
+ int sign = 0;
+ /* 1 if () surround register. */
+ int paren = 0;
+ /* Register number, -1:absent. */
+ int reg = 0;
+ /* Index register number -1:absent. */
+ int ndx = 0;
+ /* Report illegal operand, ""==OK. */
+ /* " " is a FAKE error: means we won. */
+ /* ANY err that begins with ' ' is a fake. */
+ /* " " is converted to "" before return. */
+ const char *err;
+ /* Warn about weird modes pf address. */
+ const char *wrn;
+ /* Preserve q in case we backup. */
+ char *oldq = NULL;
+ /* Build up 4-bit operand mode here. */
+ /* Note: index mode is in ndx, this is. */
+ /* The major mode of operand address. */
+ int mode = 0;
+ /* Notice how we move wrong-arg-type bugs INSIDE this module: if we
+ get the types wrong below, we lose at compile time rather than at
+ lint or run time. */
+ char access_mode; /* vop_access. */
+
+ access_mode = vopP->vop_access;
+ /* None of our code bugs (yet), no user text errors, no warnings
+ even. */
+ err = wrn = 0;
+
+ p = optext;
+
+ if (*p == ' ') /* Expect all whitespace reduced to ' '. */
+ p++; /* skip over whitespace */
+
+ if ((at = INDIRECTP (*p)) != 0)
+ { /* 1 if *p=='@'(or '*' for Un*x) */
+ p++; /* at is determined */
+ if (*p == ' ') /* Expect all whitespace reduced to ' '. */
+ p++; /* skip over whitespace */
+ }
+
+ /* This code is subtle. It tries to detect all legal (letter)'^'
+ but it doesn't waste time explicitly testing for premature '\0' because
+ this case is rejected as a mismatch against either (letter) or '^'. */
+ {
+ char c;