+/* In: 1 character, from "bdfghloqpw" being the data-type of an operand
+ of a vax instruction.
+
+ Out: the length of an operand of that type, in bytes.
+ Special branch operands types "-?!" have length 0. */
+
+static const short int vax_operand_width_size[256] =
+{
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, 0, 8, 0, 4, 8, 16, 0, 0, 0, 4, 0, 0,16, /* ..b.d.fgh...l..o */
+ 0, 8, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, /* .q.....w........ */
+ 0, 0, 1, 0, 8, 0, 4, 8, 16, 0, 0, 0, 4, 0, 0,16, /* ..b.d.fgh...l..o */
+ 0, 8, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, /* .q.....w........ */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+\f
+/* This perversion encodes all the vax opcodes as a bunch of strings.
+ RMS says we should build our hash-table at run-time. Hmm.
+ Please would someone arrange these in decreasing frequency of opcode?
+ Because of the way hash_...() works, the most frequently used opcode
+ should be textually first and so on.
+
+ Input for this table was 'vax.opcodes', awk(1)ed by 'vax.opcodes.c.awk' .
+ So change 'vax.opcodes', then re-generate this table. */
+
+#include "opcode/vax.h"
+\f
+/* This is a table of optional op-codes. All of them represent
+ 'synthetic' instructions that seem popular.
+
+ Here we make some pseudo op-codes. Every code has a bit set to say
+ it is synthetic. This lets you catch them if you want to
+ ban these opcodes. They are mnemonics for "elastic" instructions
+ that are supposed to assemble into the fewest bytes needed to do a
+ branch, or to do a conditional branch, or whatever.
+
+ The opcode is in the usual place [low-order n*8 bits]. This means
+ that if you mask off the bucky bits, the usual rules apply about
+ how long the opcode is.
+
+ All VAX branch displacements come at the end of the instruction.
+ For simple branches (1-byte opcode + 1-byte displacement) the last
+ operand is coded 'b?' where the "data type" '?' is a clue that we
+ may reverse the sense of the branch (complement lowest order bit)
+ and branch around a jump. This is by far the most common case.
+ That is why the VIT_OPCODE_SYNTHETIC bit is set: it says this is
+ a 0-byte op-code followed by 2 or more bytes of operand address.
+
+ If the op-code has VIT_OPCODE_SPECIAL set, then we have a more unusual
+ case.
+
+ For JBSB & JBR the treatment is the similar, except (1) we have a 'bw'
+ option before (2) we can directly JSB/JMP because there is no condition.
+ These operands have 'b-' as their access/data type.
+
+ That leaves a bunch of random opcodes: JACBx, JxOBxxx. In these
+ cases, we do the same idea. JACBxxx are all marked with a 'b!'
+ JAOBxxx & JSOBxxx are marked with a 'b:'. */
+#if (VIT_OPCODE_SYNTHETIC != 0x80000000)
+#error "You have just broken the encoding below, which assumes the sign bit means 'I am an imaginary instruction'."
+#endif
+
+#if (VIT_OPCODE_SPECIAL != 0x40000000)
+#error "You have just broken the encoding below, which assumes the 0x40 M bit means 'I am not to be "optimised" the way normal branches are'."
+#endif
+
+static const struct vot
+ synthetic_votstrs[] =
+{
+ {"jbsb", {"b-", 0xC0000010}}, /* BSD 4.2 */
+/* jsb used already */
+ {"jbr", {"b-", 0xC0000011}}, /* BSD 4.2 */
+ {"jr", {"b-", 0xC0000011}}, /* consistent */
+ {"jneq", {"b?", 0x80000012}},
+ {"jnequ", {"b?", 0x80000012}},
+ {"jeql", {"b?", 0x80000013}},
+ {"jeqlu", {"b?", 0x80000013}},
+ {"jgtr", {"b?", 0x80000014}},
+ {"jleq", {"b?", 0x80000015}},
+/* un-used opcodes here */
+ {"jgeq", {"b?", 0x80000018}},
+ {"jlss", {"b?", 0x80000019}},
+ {"jgtru", {"b?", 0x8000001a}},
+ {"jlequ", {"b?", 0x8000001b}},
+ {"jvc", {"b?", 0x8000001c}},
+ {"jvs", {"b?", 0x8000001d}},
+ {"jgequ", {"b?", 0x8000001e}},
+ {"jcc", {"b?", 0x8000001e}},
+ {"jlssu", {"b?", 0x8000001f}},
+ {"jcs", {"b?", 0x8000001f}},
+
+ {"jacbw", {"rwrwmwb!", 0xC000003d}},
+ {"jacbf", {"rfrfmfb!", 0xC000004f}},
+ {"jacbd", {"rdrdmdb!", 0xC000006f}},
+ {"jacbb", {"rbrbmbb!", 0xC000009d}},
+ {"jacbl", {"rlrlmlb!", 0xC00000f1}},
+ {"jacbg", {"rgrgmgb!", 0xC0004ffd}},
+ {"jacbh", {"rhrhmhb!", 0xC0006ffd}},
+
+ {"jbs", {"rlvbb?", 0x800000e0}},
+ {"jbc", {"rlvbb?", 0x800000e1}},
+ {"jbss", {"rlvbb?", 0x800000e2}},
+ {"jbcs", {"rlvbb?", 0x800000e3}},
+ {"jbsc", {"rlvbb?", 0x800000e4}},
+ {"jbcc", {"rlvbb?", 0x800000e5}},
+ {"jbssi", {"rlvbb?", 0x800000e6}},
+ {"jbcci", {"rlvbb?", 0x800000e7}},
+ {"jlbs", {"rlb?", 0x800000e8}},
+ {"jlbc", {"rlb?", 0x800000e9}},
+
+ {"jaoblss", {"rlmlb:", 0xC00000f2}},
+ {"jaobleq", {"rlmlb:", 0xC00000f3}},
+ {"jsobgeq", {"mlb:", 0xC00000f4}},
+ {"jsobgtr", {"mlb:", 0xC00000f5}},
+
+/* CASEx has no branch addresses in our conception of it. */
+/* You should use ".word ..." statements after the "case ...". */
+
+ {"", {"", 0}} /* Empty is end sentinel. */
+};
+\f
+/* Because this module is useful for both VMS and UN*X style assemblers
+ and because of the variety of UN*X assemblers we must recognise
+ the different conventions for assembler operand notation. For example
+ VMS says "#42" for immediate mode, while most UN*X say "$42".
+ We permit arbitrary sets of (single) characters to represent the
+ 3 concepts that DEC writes '#', '@', '^'. */
+
+/* Character tests. */
+#define VIP_IMMEDIATE 01 /* Character is like DEC # */
+#define VIP_INDIRECT 02 /* Char is like DEC @ */
+#define VIP_DISPLEN 04 /* Char is like DEC ^ */
+
+#define IMMEDIATEP(c) (vip_metacharacters [(c) & 0xff] & VIP_IMMEDIATE)
+#define INDIRECTP(c) (vip_metacharacters [(c) & 0xff] & VIP_INDIRECT)
+#define DISPLENP(c) (vip_metacharacters [(c) & 0xff] & VIP_DISPLEN)
+
+/* We assume 8 bits per byte. Use vip_op_defaults() to set these up BEFORE we
+ are ever called. */
+
+#if defined(CONST_TABLE)
+#define _ 0,
+#define I VIP_IMMEDIATE,
+#define S VIP_INDIRECT,
+#define D VIP_DISPLEN,
+static const char
+vip_metacharacters[256] =
+{
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /* ^@ ^A ^B ^C ^D ^E ^F ^G ^H ^I ^J ^K ^L ^M ^N ^O*/
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /* ^P ^Q ^R ^S ^T ^U ^V ^W ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
+ _ _ _ _ I _ _ _ _ _ S _ _ _ _ _ /* sp ! " # $ % & ' ( ) * + , - . / */
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /*0 1 2 3 4 5 6 7 8 9 : ; < = > ?*/
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /*@ A B C D E F G H I J K L M N O*/
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /*P Q R S T U V W X Y Z [ \ ] ^ _*/
+ D _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /*` a b c d e f g h i j k l m n o*/
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /*p q r s t u v w x y z { | } ~ ^?*/
+
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
+};
+#undef _
+#undef I
+#undef S
+#undef D
+
+#else
+
+static char vip_metacharacters[256];
+
+static void
+vip_op_1 (int bit, const char *syms)
+{
+ unsigned char t;
+
+ while ((t = *syms++) != 0)
+ vip_metacharacters[t] |= bit;
+}
+
+/* Can be called any time. More arguments may appear in future. */
+static void
+vip_op_defaults (const char *immediate, const char *indirect, const char *displen)
+{
+ vip_op_1 (VIP_IMMEDIATE, immediate);
+ vip_op_1 (VIP_INDIRECT, indirect);
+ vip_op_1 (VIP_DISPLEN, displen);
+}
+
+#endif
+
+/* Call me once before you decode any lines.
+ I decode votstrs into a hash table at op_hash (which I create).
+ I return an error text or null.
+ If you want, I will include the 'synthetic' jXXX instructions in the
+ instruction table.
+ You must nominate metacharacters for eg DEC's "#", "@", "^". */
+
+static const char *
+vip_begin (int synthetic_too, /* 1 means include jXXX op-codes. */
+ const char *immediate,
+ const char *indirect,
+ const char *displen)
+{
+ const struct vot *vP; /* scan votstrs */
+ const char *retval = 0; /* error text */
+
+ op_hash = hash_new ();
+
+ for (vP = votstrs; *vP->vot_name && !retval; vP++)
+ retval = hash_insert (op_hash, vP->vot_name, (void *) &vP->vot_detail);
+
+ if (synthetic_too)
+ for (vP = synthetic_votstrs; *vP->vot_name && !retval; vP++)
+ retval = hash_insert (op_hash, vP->vot_name, (void *) &vP->vot_detail);
+
+#ifndef CONST_TABLE
+ vip_op_defaults (immediate, indirect, displen);
+#endif
+
+ return retval;
+}
+
+/* Take 3 char.s, the last of which may be `\0` (non-existent)
+ and return the VAX register number that they represent.
+
+ Return -1 if they don't form a register name. Good names return
+ a number from 0:15 inclusive.
+
+ Case is not important in a name.
+
+ Register names understood are:
+
+ R0
+ R1
+ R2
+ R3
+ R4
+ R5
+ R6
+ R7
+ R8
+ R9
+ R10
+ R11
+ R12 AP
+ R13 FP
+ R14 SP
+ R15 PC */
+
+#define AP 12
+#define FP 13
+#define SP 14
+#define PC 15
+
+/* Returns the register number of something like '%r15' or 'ap', supplied
+ in four single chars. Returns -1 if the register isn't recognized,
+ 0..15 otherwise. */
+static int
+vax_reg_parse (char c1, char c2, char c3, char c4)
+{
+ int retval = -1;
+
+#ifdef OBJ_ELF
+ if (c1 != '%') /* Register prefixes are mandatory for ELF. */
+ return retval;
+ c1 = c2;
+ c2 = c3;
+ c3 = c4;
+#endif
+#ifdef OBJ_VMS
+ if (c4 != 0) /* Register prefixes are not allowed under VMS. */
+ return retval;
+#endif
+#ifdef OBJ_AOUT
+ if (c1 == '%') /* Register prefixes are optional under a.out. */
+ {
+ c1 = c2;
+ c2 = c3;
+ c3 = c4;
+ }
+ else if (c3 && c4) /* Can't be 4 characters long. */
+ return retval;
+#endif
+
+ c1 = TOLOWER (c1);
+ c2 = TOLOWER (c2);
+ if (ISDIGIT (c2) && c1 == 'r')
+ {
+ retval = c2 - '0';
+ if (ISDIGIT (c3))
+ {
+ retval = retval * 10 + c3 - '0';
+ retval = (retval > 15) ? -1 : retval;
+ /* clamp the register value to 1 hex digit */
+ }
+ else if (c3)
+ retval = -1; /* c3 must be '\0' or a digit. */
+ }
+ else if (c3) /* There are no three letter regs. */
+ retval = -1;
+ else if (c2 == 'p')
+ {
+ switch (c1)
+ {
+ case 's':
+ retval = SP;
+ break;
+ case 'f':
+ retval = FP;
+ break;
+ case 'a':
+ retval = AP;
+ break;
+ default:
+ retval = -1;
+ }
+ }
+ else if (c1 == 'p' && c2 == 'c')
+ retval = PC;
+ else
+ retval = -1;
+ return retval;
+}
+
+/* Parse a vax operand in DEC assembler notation.
+ For speed, expect a string of whitespace to be reduced to a single ' '.
+ This is the case for GNU AS, and is easy for other DEC-compatible
+ assemblers.
+
+ Knowledge about DEC VAX assembler operand notation lives here.
+ This doesn't even know what a register name is, except it believes
+ all register names are 2 or 3 characters, and lets vax_reg_parse() say
+ what number each name represents.
+ It does, however, know that PC, SP etc are special registers so it can
+ detect addressing modes that are silly for those registers.
+
+ Where possible, it delivers 1 fatal or 1 warning message if the operand
+ is suspect. Exactly what we test for is still evolving.
+
+ ---
+ Arg block.
+
+ There were a number of 'mismatched argument type' bugs to vip_op.
+ The most general solution is to typedef each (of many) arguments.
+ We used instead a typedef'd argument block. This is less modular
+ than using separate return pointers for each result, but runs faster
+ on most engines, and seems to keep programmers happy. It will have
+ to be done properly if we ever want to use vip_op as a general-purpose
+ module (it was designed to be).
+
+ G^
+
+ Doesn't support DEC "G^" format operands. These always take 5 bytes
+ to express, and code as modes 8F or 9F. Reason: "G^" deprives you of
+ optimising to (say) a "B^" if you are lucky in the way you link.
+ When someone builds a linker smart enough to convert "G^" to "B^", "W^"
+ whenever possible, then we should implement it.
+ If there is some other use for "G^", feel free to code it in!
+
+ speed
+
+ If I nested if()s more, I could avoid testing (*err) which would save
+ time, space and page faults. I didn't nest all those if()s for clarity
+ and because I think the mode testing can be re-arranged 1st to test the
+ commoner constructs 1st. Does anybody have statistics on this?
+
+ error messages
+
+ In future, we should be able to 'compose' error messages in a scratch area
+ and give the user MUCH more informative error messages. Although this takes
+ a little more code at run-time, it will make this module much more self-
+ documenting. As an example of what sucks now: most error messages have
+ hardwired into them the DEC VAX metacharacters "#^@" which are nothing like
+ the Un*x characters "$`*", that most users will expect from this AS.
+
+ ----
+
+ The input is a string, ending with '\0'.
+
+ We also require a 'hint' of what kind of operand is expected: so
+ we can remind caller not to write into literals for instance.
+
+ The output is a skeletal instruction.
+
+ The algorithm has two parts.
+ 1. extract the syntactic features (parse off all the @^#-()+[] mode crud);
+ 2. express the @^#-()+[] as some parameters suited to further analysis.
+
+ 2nd step is where we detect the googles of possible invalid combinations
+ a human (or compiler) might write. Note that if we do a half-way
+ decent assembler, we don't know how long to make (eg) displacement
+ fields when we first meet them (because they may not have defined values).
+ So we must wait until we know how many bits are needed for each address,
+ then we can know both length and opcodes of instructions.
+ For reason(s) above, we will pass to our caller a 'broken' instruction
+ of these major components, from which our caller can generate instructions:
+ - displacement length I^ S^ L^ B^ W^ unspecified
+ - mode (many)
+ - register R0-R15 or absent
+ - index register R0-R15 or absent
+ - expression text what we don't parse
+ - error text(s) why we couldn't understand the operand
+
+ ----
+
+ To decode output of this, test errtxt. If errtxt[0] == '\0', then
+ we had no errors that prevented parsing. Also, if we ever report
+ an internal bug, errtxt[0] is set non-zero. So one test tells you
+ if the other outputs are to be taken seriously.
+
+ ----
+
+ Dec defines the semantics of address modes (and values)
+ by a two-letter code, explained here.
+
+ letter 1: access type
+
+ a address calculation - no data access, registers forbidden
+ b branch displacement
+ m read - let go of bus - write back "modify"
+ r read
+ v bit field address: like 'a' but registers are OK
+ w write
+ space no operator (eg ".long foo") [our convention]
+
+ letter 2: data type (i.e. width, alignment)
+
+ b byte
+ d double precision floating point (D format)
+ f single precision floating point (F format)
+ g G format floating
+ h H format floating
+ l longword
+ o octaword
+ q quadword
+ w word
+ ? simple synthetic branch operand
+ - unconditional synthetic JSB/JSR operand
+ ! complex synthetic branch operand
+
+ The '-?!' letter 2's are not for external consumption. They are used
+ for various assemblers. Generally, all unknown widths are assumed 0.
+ We don't limit your choice of width character.
+
+ DEC operands are hard work to parse. For example, '@' as the first
+ character means indirect (deferred) mode but elsewhere it is a shift
+ operator.
+ The long-winded explanation of how this is supposed to work is
+ cancelled. Read a DEC vax manual.
+ We try hard not to parse anything that MIGHT be part of the expression
+ buried in that syntax. For example if we see @...(Rn) we don't check
+ for '-' before the '(' because mode @-(Rn) does not exist.
+
+ After parsing we have:
+
+ at 1 if leading '@' (or Un*x '*')
+ len takes one value from " bilsw". eg B^ -> 'b'.
+ hash 1 if leading '#' (or Un*x '$')
+ expr_begin, expr_end the expression we did not parse
+ even though we don't interpret it, we make use
+ of its presence or absence.
+ sign -1: -(Rn) 0: absent +1: (Rn)+
+ paren 1 if () are around register
+ reg major register number 0:15 -1 means absent
+ ndx index register number 0:15 -1 means absent
+
+ Again, I dare not explain it: just trace ALL the code!
+
+ Summary of vip_op outputs.
+
+ mode reg len ndx
+ (Rn) => @Rn
+ {@}Rn 5+@ n ' ' optional
+ branch operand 0 -1 ' ' -1
+ S^#foo 0 -1 's' -1
+ -(Rn) 7 n ' ' optional
+ {@}(Rn)+ 8+@ n ' ' optional
+ {@}#foo, no S^ 8+@ PC " i" optional
+ {@}{q^}{(Rn)} 10+@+q option " bwl" optional */
+
+/* Dissect user-input 'optext' (which is something like "@B^foo@bar(AP)[FP]:")
+ using the vop in vopP. vopP's vop_access and vop_width. We fill _ndx, _reg,
+ _mode, _short, _warn, _error, _expr_begin, _expr_end and _nbytes. */
+
+static void
+vip_op (char *optext, struct vop *vopP)
+{
+ /* Track operand text forward. */
+ char *p;
+ /* Track operand text backward. */
+ char *q;
+ /* 1 if leading '@' ('*') seen. */
+ int at;
+ /* one of " bilsw" */
+ char len;
+ /* 1 if leading '#' ('$') seen. */
+ int hash;
+ /* -1, 0 or +1. */
+ int sign = 0;
+ /* 1 if () surround register. */
+ int paren = 0;
+ /* Register number, -1:absent. */
+ int reg = 0;
+ /* Index register number -1:absent. */
+ int ndx = 0;
+ /* Report illegal operand, ""==OK. */
+ /* " " is a FAKE error: means we won. */
+ /* ANY err that begins with ' ' is a fake. */
+ /* " " is converted to "" before return. */
+ const char *err;
+ /* Warn about weird modes pf address. */
+ const char *wrn;
+ /* Preserve q in case we backup. */
+ char *oldq = NULL;
+ /* Build up 4-bit operand mode here. */
+ /* Note: index mode is in ndx, this is. */
+ /* The major mode of operand address. */
+ int mode = 0;
+ /* Notice how we move wrong-arg-type bugs INSIDE this module: if we
+ get the types wrong below, we lose at compile time rather than at
+ lint or run time. */
+ char access_mode; /* vop_access. */
+
+ access_mode = vopP->vop_access;
+ /* None of our code bugs (yet), no user text errors, no warnings
+ even. */
+ err = wrn = 0;
+
+ p = optext;
+
+ if (*p == ' ') /* Expect all whitespace reduced to ' '. */
+ p++; /* skip over whitespace */
+
+ if ((at = INDIRECTP (*p)) != 0)
+ { /* 1 if *p=='@'(or '*' for Un*x) */
+ p++; /* at is determined */
+ if (*p == ' ') /* Expect all whitespace reduced to ' '. */
+ p++; /* skip over whitespace */
+ }
+
+ /* This code is subtle. It tries to detect all legal (letter)'^'
+ but it doesn't waste time explicitly testing for premature '\0' because
+ this case is rejected as a mismatch against either (letter) or '^'. */
+ {
+ char c;
+
+ c = *p;
+ c = TOLOWER (c);
+ if (DISPLENP (p[1]) && strchr ("bilws", len = c))
+ p += 2; /* Skip (letter) '^'. */
+ else /* No (letter) '^' seen. */
+ len = ' '; /* Len is determined. */
+ }
+
+ if (*p == ' ') /* Expect all whitespace reduced to ' '. */
+ p++;
+
+ if ((hash = IMMEDIATEP (*p)) != 0) /* 1 if *p=='#' ('$' for Un*x) */
+ p++; /* Hash is determined. */
+
+ /* p points to what may be the beginning of an expression.
+ We have peeled off the front all that is peelable.
+ We know at, len, hash.
+
+ Lets point q at the end of the text and parse that (backwards). */
+
+ for (q = p; *q; q++)
+ ;
+ q--; /* Now q points at last char of text. */
+
+ if (*q == ' ' && q >= p) /* Expect all whitespace reduced to ' '. */
+ q--;
+
+ /* Reverse over whitespace, but don't. */
+ /* Run back over *p. */
+
+ /* As a matter of policy here, we look for [Rn], although both Rn and S^#
+ forbid [Rn]. This is because it is easy, and because only a sick
+ cyborg would have [...] trailing an expression in a VAX-like assembler.
+ A meticulous parser would first check for Rn followed by '(' or '['
+ and not parse a trailing ']' if it found another. We just ban expressions
+ ending in ']'. */
+ if (*q == ']')
+ {
+ while (q >= p && *q != '[')
+ q--;
+ /* Either q<p or we got matching '['. */
+ if (q < p)
+ err = _("no '[' to match ']'");
+ else
+ {
+ /* Confusers like "[]" will eventually lose with a bad register
+ * name error. So again we don't need to check for early '\0'. */
+ if (q[3] == ']')
+ ndx = vax_reg_parse (q[1], q[2], 0, 0);
+ else if (q[4] == ']')
+ ndx = vax_reg_parse (q[1], q[2], q[3], 0);
+ else if (q[5] == ']')
+ ndx = vax_reg_parse (q[1], q[2], q[3], q[4]);
+ else
+ ndx = -1;
+ /* Since we saw a ']' we will demand a register name in the [].
+ * If luser hasn't given us one: be rude. */
+ if (ndx < 0)
+ err = _("bad register in []");
+ else if (ndx == PC)
+ err = _("[PC] index banned");
+ else
+ /* Point q just before "[...]". */
+ q--;
+ }
+ }
+ else
+ /* No ']', so no iNDeX register. */
+ ndx = -1;
+
+ /* If err = "..." then we lost: run away.
+ Otherwise ndx == -1 if there was no "[...]".
+ Otherwise, ndx is index register number, and q points before "[...]". */
+
+ if (*q == ' ' && q >= p) /* Expect all whitespace reduced to ' '. */
+ q--;
+ /* Reverse over whitespace, but don't. */
+ /* Run back over *p. */
+ if (!err || !*err)
+ {
+ /* no ()+ or -() seen yet */
+ sign = 0;
+
+ if (q > p + 3 && *q == '+' && q[-1] == ')')
+ {
+ sign = 1; /* we saw a ")+" */
+ q--; /* q points to ')' */
+ }
+
+ if (*q == ')' && q > p + 2)
+ {
+ paren = 1; /* assume we have "(...)" */
+ while (q >= p && *q != '(')
+ q--;
+ /* either q<p or we got matching '(' */
+ if (q < p)
+ err = _("no '(' to match ')'");
+ else
+ {
+ /* Confusers like "()" will eventually lose with a bad register
+ name error. So again we don't need to check for early '\0'. */
+ if (q[3] == ')')
+ reg = vax_reg_parse (q[1], q[2], 0, 0);
+ else if (q[4] == ')')
+ reg = vax_reg_parse (q[1], q[2], q[3], 0);
+ else if (q[5] == ')')
+ reg = vax_reg_parse (q[1], q[2], q[3], q[4]);