gas/
[deliverable/binutils-gdb.git] / gas / app.c
CommitLineData
252b5132 1/* This is the Assembler Pre-Processor
f7e42eb4 2 Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
2da5c037 3 1999, 2000, 2001, 2002, 2003
252b5132
RH
4 Free Software Foundation, Inc.
5
6 This file is part of GAS, the GNU Assembler.
7
8 GAS is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GAS is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GAS; see the file COPYING. If not, write to the Free
4b4da160
NC
20 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
252b5132 22
204cd129 23/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
252b5132
RH
24/* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
26 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
27 pair. This needs better error-handling. */
28
29#include <stdio.h>
8d9cd6b1 30#include "as.h" /* For BAD_CASE() only. */
252b5132
RH
31
32#if (__STDC__ != 1)
33#ifndef const
34#define const /* empty */
35#endif
36#endif
37
abd63a32 38#ifdef TC_M68K
252b5132
RH
39/* Whether we are scrubbing in m68k MRI mode. This is different from
40 flag_m68k_mri, because the two flags will be affected by the .mri
41 pseudo-op at different times. */
42static int scrub_m68k_mri;
43
44/* The pseudo-op which switches in and out of MRI mode. See the
45 comment in do_scrub_chars. */
46static const char mri_pseudo[] = ".mri 0";
72297628
AM
47#else
48#define scrub_m68k_mri 0
49#endif
252b5132
RH
50
51#if defined TC_ARM && defined OBJ_ELF
3ee4defc 52/* The pseudo-op for which we need to special-case `@' characters.
252b5132
RH
53 See the comment in do_scrub_chars. */
54static const char symver_pseudo[] = ".symver";
55static const char * symver_state;
56#endif
57
58static char lex[256];
59static const char symbol_chars[] =
60"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
61
62#define LEX_IS_SYMBOL_COMPONENT 1
63#define LEX_IS_WHITESPACE 2
64#define LEX_IS_LINE_SEPARATOR 3
65#define LEX_IS_COMMENT_START 4
66#define LEX_IS_LINE_COMMENT_START 5
67#define LEX_IS_TWOCHAR_COMMENT_1ST 6
68#define LEX_IS_STRINGQUOTE 8
69#define LEX_IS_COLON 9
70#define LEX_IS_NEWLINE 10
71#define LEX_IS_ONECHAR_QUOTE 11
72#ifdef TC_V850
73#define LEX_IS_DOUBLEDASH_1ST 12
74#endif
75#ifdef TC_M32R
f28e8eb3
TW
76#define DOUBLEBAR_PARALLEL
77#endif
78#ifdef DOUBLEBAR_PARALLEL
252b5132
RH
79#define LEX_IS_DOUBLEBAR_1ST 13
80#endif
62f65a7b 81#define LEX_IS_PARALLEL_SEPARATOR 14
252b5132
RH
82#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
83#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
84#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
62f65a7b 85#define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
252b5132
RH
86#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
87#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
88#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
89
73ee5e4c 90static int process_escape (int);
252b5132
RH
91
92/* FIXME-soon: The entire lexer/parser thingy should be
93 built statically at compile time rather than dynamically
3ee4defc 94 each and every time the assembler is run. xoxorich. */
252b5132 95
3ee4defc 96void
73ee5e4c 97do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
252b5132
RH
98{
99 const char *p;
100 int c;
101
252b5132
RH
102 lex[' '] = LEX_IS_WHITESPACE;
103 lex['\t'] = LEX_IS_WHITESPACE;
104 lex['\r'] = LEX_IS_WHITESPACE;
105 lex['\n'] = LEX_IS_NEWLINE;
252b5132
RH
106 lex[':'] = LEX_IS_COLON;
107
abd63a32
AM
108#ifdef TC_M68K
109 scrub_m68k_mri = m68k_mri;
110
252b5132 111 if (! m68k_mri)
abd63a32 112#endif
252b5132
RH
113 {
114 lex['"'] = LEX_IS_STRINGQUOTE;
115
5b93d8bb 116#if ! defined (TC_HPPA) && ! defined (TC_I370)
204cd129 117 /* I370 uses single-quotes to delimit integer, float constants. */
252b5132
RH
118 lex['\''] = LEX_IS_ONECHAR_QUOTE;
119#endif
120
121#ifdef SINGLE_QUOTE_STRINGS
122 lex['\''] = LEX_IS_STRINGQUOTE;
123#endif
124 }
125
126 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
127 in state 5 of do_scrub_chars must be changed. */
128
129 /* Note that these override the previous defaults, e.g. if ';' is a
130 comment char, then it isn't a line separator. */
131 for (p = symbol_chars; *p; ++p)
204cd129 132 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
252b5132
RH
133
134 for (c = 128; c < 256; ++c)
135 lex[c] = LEX_IS_SYMBOL_COMPONENT;
136
137#ifdef tc_symbol_chars
138 /* This macro permits the processor to specify all characters which
139 may appears in an operand. This will prevent the scrubber from
140 discarding meaningful whitespace in certain cases. The i386
141 backend uses this to support prefixes, which can confuse the
142 scrubber as to whether it is parsing operands or opcodes. */
143 for (p = tc_symbol_chars; *p; ++p)
144 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
145#endif
146
147 /* The m68k backend wants to be able to change comment_chars. */
148#ifndef tc_comment_chars
149#define tc_comment_chars comment_chars
150#endif
151 for (p = tc_comment_chars; *p; p++)
204cd129 152 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
252b5132
RH
153
154 for (p = line_comment_chars; *p; p++)
204cd129 155 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
252b5132
RH
156
157 for (p = line_separator_chars; *p; p++)
204cd129 158 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
252b5132 159
62f65a7b
DB
160#ifdef tc_parallel_separator_chars
161 /* This macro permits the processor to specify all characters which
162 separate parallel insns on the same line. */
163 for (p = tc_parallel_separator_chars; *p; p++)
204cd129 164 lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
62f65a7b
DB
165#endif
166
252b5132
RH
167 /* Only allow slash-star comments if slash is not in use.
168 FIXME: This isn't right. We should always permit them. */
169 if (lex['/'] == 0)
204cd129 170 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
252b5132 171
abd63a32 172#ifdef TC_M68K
252b5132
RH
173 if (m68k_mri)
174 {
175 lex['\''] = LEX_IS_STRINGQUOTE;
176 lex[';'] = LEX_IS_COMMENT_START;
177 lex['*'] = LEX_IS_LINE_COMMENT_START;
178 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
b1ac4c66 179 then it can't be used in an expression. */
252b5132
RH
180 lex['!'] = LEX_IS_LINE_COMMENT_START;
181 }
abd63a32 182#endif
252b5132
RH
183
184#ifdef TC_V850
185 lex['-'] = LEX_IS_DOUBLEDASH_1ST;
186#endif
f28e8eb3 187#ifdef DOUBLEBAR_PARALLEL
252b5132
RH
188 lex['|'] = LEX_IS_DOUBLEBAR_1ST;
189#endif
190#ifdef TC_D30V
204cd129 191 /* Must do this is we want VLIW instruction with "->" or "<-". */
252b5132
RH
192 lex['-'] = LEX_IS_SYMBOL_COMPONENT;
193#endif
204cd129 194}
252b5132 195
204cd129 196/* Saved state of the scrubber. */
252b5132
RH
197static int state;
198static int old_state;
199static char *out_string;
200static char out_buf[20];
201static int add_newlines;
202static char *saved_input;
203static int saved_input_len;
2b47531b 204static char input_buffer[32 * 1024];
252b5132
RH
205static const char *mri_state;
206static char mri_last_ch;
207
208/* Data structure for saving the state of app across #include's. Note that
209 app is called asynchronously to the parsing of the .include's, so our
210 state at the time .include is interpreted is completely unrelated.
211 That's why we have to save it all. */
212
204cd129
NC
213struct app_save
214{
30a2b4ef
KH
215 int state;
216 int old_state;
217 char * out_string;
218 char out_buf[sizeof (out_buf)];
219 int add_newlines;
220 char * saved_input;
221 int saved_input_len;
abd63a32 222#ifdef TC_M68K
30a2b4ef 223 int scrub_m68k_mri;
abd63a32 224#endif
30a2b4ef
KH
225 const char * mri_state;
226 char mri_last_ch;
252b5132 227#if defined TC_ARM && defined OBJ_ELF
30a2b4ef 228 const char * symver_state;
252b5132 229#endif
30a2b4ef 230};
252b5132
RH
231
232char *
73ee5e4c 233app_push (void)
252b5132
RH
234{
235 register struct app_save *saved;
236
237 saved = (struct app_save *) xmalloc (sizeof (*saved));
238 saved->state = state;
239 saved->old_state = old_state;
240 saved->out_string = out_string;
241 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
242 saved->add_newlines = add_newlines;
2b47531b
ILT
243 if (saved_input == NULL)
244 saved->saved_input = NULL;
245 else
246 {
247 saved->saved_input = xmalloc (saved_input_len);
248 memcpy (saved->saved_input, saved_input, saved_input_len);
249 saved->saved_input_len = saved_input_len;
250 }
abd63a32 251#ifdef TC_M68K
252b5132 252 saved->scrub_m68k_mri = scrub_m68k_mri;
abd63a32 253#endif
252b5132
RH
254 saved->mri_state = mri_state;
255 saved->mri_last_ch = mri_last_ch;
256#if defined TC_ARM && defined OBJ_ELF
257 saved->symver_state = symver_state;
258#endif
259
3ee4defc 260 /* do_scrub_begin() is not useful, just wastes time. */
252b5132
RH
261
262 state = 0;
263 saved_input = NULL;
264
265 return (char *) saved;
266}
267
3ee4defc 268void
73ee5e4c 269app_pop (char *arg)
252b5132
RH
270{
271 register struct app_save *saved = (struct app_save *) arg;
272
3ee4defc 273 /* There is no do_scrub_end (). */
252b5132
RH
274 state = saved->state;
275 old_state = saved->old_state;
276 out_string = saved->out_string;
277 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
278 add_newlines = saved->add_newlines;
2b47531b
ILT
279 if (saved->saved_input == NULL)
280 saved_input = NULL;
281 else
282 {
ab9da554 283 assert (saved->saved_input_len <= (int) (sizeof input_buffer));
2b47531b
ILT
284 memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
285 saved_input = input_buffer;
286 saved_input_len = saved->saved_input_len;
287 free (saved->saved_input);
288 }
abd63a32 289#ifdef TC_M68K
252b5132 290 scrub_m68k_mri = saved->scrub_m68k_mri;
abd63a32 291#endif
252b5132
RH
292 mri_state = saved->mri_state;
293 mri_last_ch = saved->mri_last_ch;
294#if defined TC_ARM && defined OBJ_ELF
295 symver_state = saved->symver_state;
296#endif
297
298 free (arg);
204cd129 299}
252b5132
RH
300
301/* @@ This assumes that \n &c are the same on host and target. This is not
302 necessarily true. */
204cd129 303
3ee4defc 304static int
73ee5e4c 305process_escape (int ch)
252b5132
RH
306{
307 switch (ch)
308 {
309 case 'b':
310 return '\b';
311 case 'f':
312 return '\f';
313 case 'n':
314 return '\n';
315 case 'r':
316 return '\r';
317 case 't':
318 return '\t';
319 case '\'':
320 return '\'';
321 case '"':
322 return '\"';
323 default:
324 return ch;
325 }
326}
327
328/* This function is called to process input characters. The GET
329 parameter is used to retrieve more input characters. GET should
330 set its parameter to point to a buffer, and return the length of
331 the buffer; it should return 0 at end of file. The scrubbed output
332 characters are put into the buffer starting at TOSTART; the TOSTART
333 buffer is TOLEN bytes in length. The function returns the number
334 of scrubbed characters put into TOSTART. This will be TOLEN unless
335 end of file was seen. This function is arranged as a state
336 machine, and saves its state so that it may return at any point.
337 This is the way the old code used to work. */
338
339int
73ee5e4c 340do_scrub_chars (int (*get) (char *, int), char *tostart, int tolen)
252b5132
RH
341{
342 char *to = tostart;
343 char *toend = tostart + tolen;
344 char *from;
345 char *fromend;
346 int fromlen;
347 register int ch, ch2 = 0;
c9c5dcda
AM
348 /* Character that started the string we're working on. */
349 static char quotechar;
252b5132
RH
350
351 /*State 0: beginning of normal line
352 1: After first whitespace on line (flush more white)
353 2: After first non-white (opcode) on line (keep 1white)
354 3: after second white on line (into operands) (flush white)
355 4: after putting out a .line, put out digits
356 5: parsing a string, then go to old-state
357 6: putting out \ escape in a "d string.
358 7: After putting out a .appfile, put out string.
359 8: After putting out a .appfile string, flush until newline.
360 9: After seeing symbol char in state 3 (keep 1white after symchar)
361 10: After seeing whitespace in state 9 (keep white before symchar)
362 11: After seeing a symbol character in state 0 (eg a label definition)
363 -1: output string in out_string and go to the state in old_state
364 -2: flush text until a '*' '/' is seen, then go to state old_state
365#ifdef TC_V850
b1ac4c66
AM
366 12: After seeing a dash, looking for a second dash as a start
367 of comment.
252b5132 368#endif
f28e8eb3 369#ifdef DOUBLEBAR_PARALLEL
b1ac4c66
AM
370 13: After seeing a vertical bar, looking for a second
371 vertical bar as a parallel expression separator.
52628315
L
372#endif
373#ifdef TC_IA64
374 14: After seeing a `(' at state 0, looking for a `)' as
375 predicate.
376 15: After seeing a `(' at state 1, looking for a `)' as
377 predicate.
252b5132
RH
378#endif
379 */
380
381 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
382 constructs like ``.loc 1 20''. This was turning into ``.loc
383 120''. States 9 and 10 ensure that a space is never dropped in
3b37fd66 384 between characters which could appear in an identifier. Ian
252b5132
RH
385 Taylor, ian@cygnus.com.
386
387 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
388 correctly on the PA (and any other target where colons are optional).
389 Jeff Law, law@cs.utah.edu.
390
391 I added state 13 so that something like "cmp r1, r2 || trap #1" does not
392 get squashed into "cmp r1,r2||trap#1", with the all important space
393 between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
394
395 /* This macro gets the next input character. */
396
2b47531b
ILT
397#define GET() \
398 (from < fromend \
399 ? * (unsigned char *) (from++) \
400 : (saved_input = NULL, \
401 fromlen = (*get) (input_buffer, sizeof input_buffer), \
402 from = input_buffer, \
403 fromend = from + fromlen, \
404 (fromlen == 0 \
405 ? EOF \
252b5132
RH
406 : * (unsigned char *) (from++))))
407
408 /* This macro pushes a character back on the input stream. */
409
410#define UNGET(uch) (*--from = (uch))
411
412 /* This macro puts a character into the output buffer. If this
413 character fills the output buffer, this macro jumps to the label
414 TOFULL. We use this rather ugly approach because we need to
415 handle two different termination conditions: EOF on the input
416 stream, and a full output buffer. It would be simpler if we
417 always read in the entire input stream before processing it, but
418 I don't want to make such a significant change to the assembler's
419 memory usage. */
420
411863a4
KH
421#define PUT(pch) \
422 do \
423 { \
424 *to++ = (pch); \
425 if (to >= toend) \
426 goto tofull; \
427 } \
252b5132
RH
428 while (0)
429
430 if (saved_input != NULL)
431 {
432 from = saved_input;
433 fromend = from + saved_input_len;
434 }
435 else
436 {
2b47531b 437 fromlen = (*get) (input_buffer, sizeof input_buffer);
252b5132
RH
438 if (fromlen == 0)
439 return 0;
2b47531b 440 from = input_buffer;
252b5132
RH
441 fromend = from + fromlen;
442 }
443
444 while (1)
445 {
446 /* The cases in this switch end with continue, in order to
b1ac4c66
AM
447 branch back to the top of this while loop and generate the
448 next output character in the appropriate state. */
252b5132
RH
449 switch (state)
450 {
451 case -1:
452 ch = *out_string++;
453 if (*out_string == '\0')
454 {
455 state = old_state;
456 old_state = 3;
457 }
458 PUT (ch);
459 continue;
460
461 case -2:
462 for (;;)
463 {
464 do
465 {
466 ch = GET ();
467
468 if (ch == EOF)
469 {
470 as_warn (_("end of file in comment"));
471 goto fromeof;
472 }
473
474 if (ch == '\n')
475 PUT ('\n');
476 }
477 while (ch != '*');
478
479 while ((ch = GET ()) == '*')
480 ;
481
482 if (ch == EOF)
483 {
484 as_warn (_("end of file in comment"));
485 goto fromeof;
486 }
487
488 if (ch == '/')
489 break;
490
491 UNGET (ch);
492 }
493
494 state = old_state;
495 UNGET (' ');
496 continue;
497
498 case 4:
499 ch = GET ();
500 if (ch == EOF)
501 goto fromeof;
502 else if (ch >= '0' && ch <= '9')
503 PUT (ch);
504 else
505 {
506 while (ch != EOF && IS_WHITESPACE (ch))
507 ch = GET ();
508 if (ch == '"')
509 {
510 UNGET (ch);
511 if (scrub_m68k_mri)
512 out_string = "\n\tappfile ";
513 else
514 out_string = "\n\t.appfile ";
515 old_state = 7;
516 state = -1;
517 PUT (*out_string++);
518 }
519 else
520 {
521 while (ch != EOF && ch != '\n')
522 ch = GET ();
523 state = 0;
524 PUT (ch);
525 }
526 }
527 continue;
528
529 case 5:
530 /* We are going to copy everything up to a quote character,
b1ac4c66
AM
531 with special handling for a backslash. We try to
532 optimize the copying in the simple case without using the
533 GET and PUT macros. */
252b5132
RH
534 {
535 char *s;
536 int len;
537
538 for (s = from; s < fromend; s++)
539 {
540 ch = *s;
252b5132 541 if (ch == '\\'
c9c5dcda 542 || ch == quotechar
252b5132
RH
543 || ch == '\n')
544 break;
545 }
546 len = s - from;
547 if (len > toend - to)
548 len = toend - to;
549 if (len > 0)
550 {
551 memcpy (to, from, len);
552 to += len;
553 from += len;
554 }
555 }
556
557 ch = GET ();
558 if (ch == EOF)
559 {
c9c5dcda 560 as_warn (_("end of file in string; '%c' inserted"), quotechar);
252b5132
RH
561 state = old_state;
562 UNGET ('\n');
c9c5dcda 563 PUT (quotechar);
252b5132 564 }
c9c5dcda 565 else if (ch == quotechar)
252b5132
RH
566 {
567 state = old_state;
568 PUT (ch);
569 }
570#ifndef NO_STRING_ESCAPES
571 else if (ch == '\\')
572 {
573 state = 6;
574 PUT (ch);
575 }
576#endif
577 else if (scrub_m68k_mri && ch == '\n')
578 {
579 /* Just quietly terminate the string. This permits lines like
204cd129 580 bne label loop if we haven't reach end yet. */
252b5132
RH
581 state = old_state;
582 UNGET (ch);
583 PUT ('\'');
584 }
585 else
586 {
587 PUT (ch);
588 }
589 continue;
590
591 case 6:
592 state = 5;
593 ch = GET ();
594 switch (ch)
595 {
596 /* Handle strings broken across lines, by turning '\n' into
597 '\\' and 'n'. */
598 case '\n':
599 UNGET ('n');
600 add_newlines++;
601 PUT ('\\');
602 continue;
603
4252e537 604 case EOF:
c9c5dcda
AM
605 as_warn (_("end of file in string; '%c' inserted"), quotechar);
606 PUT (quotechar);
4252e537
AM
607 continue;
608
252b5132
RH
609 case '"':
610 case '\\':
611 case 'b':
612 case 'f':
613 case 'n':
614 case 'r':
615 case 't':
616 case 'v':
617 case 'x':
618 case 'X':
619 case '0':
620 case '1':
621 case '2':
622 case '3':
623 case '4':
624 case '5':
625 case '6':
626 case '7':
627 break;
4252e537 628
252b5132 629 default:
4252e537 630#ifdef ONLY_STANDARD_ESCAPES
0e389e77 631 as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
4252e537 632#endif
252b5132 633 break;
252b5132
RH
634 }
635 PUT (ch);
636 continue;
637
638 case 7:
639 ch = GET ();
c9c5dcda 640 quotechar = ch;
252b5132
RH
641 state = 5;
642 old_state = 8;
252b5132
RH
643 PUT (ch);
644 continue;
645
646 case 8:
647 do
648 ch = GET ();
649 while (ch != '\n' && ch != EOF);
650 if (ch == EOF)
651 goto fromeof;
652 state = 0;
653 PUT (ch);
654 continue;
b1ac4c66
AM
655
656#ifdef DOUBLEBAR_PARALLEL
657 case 13:
658 ch = GET ();
659 if (ch != '|')
660 abort ();
661
662 /* Reset back to state 1 and pretend that we are parsing a
663 line from just after the first white space. */
664 state = 1;
665 PUT ('|');
666 continue;
667#endif
252b5132
RH
668 }
669
204cd129 670 /* OK, we are somewhere in states 0 through 4 or 9 through 11. */
252b5132
RH
671
672 /* flushchar: */
673 ch = GET ();
674
52628315
L
675#ifdef TC_IA64
676 if (ch == '(' && (state == 0 || state == 1))
677 {
678 state += 14;
679 PUT (ch);
680 continue;
681 }
682 else if (state == 14 || state == 15)
683 {
684 if (ch == ')')
70b911ad
JJ
685 {
686 state -= 14;
687 PUT (ch);
688 ch = GET ();
689 }
52628315
L
690 else
691 {
692 PUT (ch);
693 continue;
694 }
695 }
696#endif
697
252b5132
RH
698 recycle:
699
700#if defined TC_ARM && defined OBJ_ELF
701 /* We need to watch out for .symver directives. See the comment later
702 in this function. */
703 if (symver_state == NULL)
704 {
705 if ((state == 0 || state == 1) && ch == symver_pseudo[0])
706 symver_state = symver_pseudo + 1;
707 }
708 else
709 {
710 /* We advance to the next state if we find the right
711 character. */
712 if (ch != '\0' && (*symver_state == ch))
713 ++symver_state;
714 else if (*symver_state != '\0')
715 /* We did not get the expected character, or we didn't
716 get a valid terminating character after seeing the
717 entire pseudo-op, so we must go back to the beginning. */
718 symver_state = NULL;
719 else
720 {
721 /* We've read the entire pseudo-op. If this is the end
722 of the line, go back to the beginning. */
723 if (IS_NEWLINE (ch))
724 symver_state = NULL;
725 }
726 }
727#endif /* TC_ARM && OBJ_ELF */
728
729#ifdef TC_M68K
730 /* We want to have pseudo-ops which control whether we are in
b1ac4c66
AM
731 MRI mode or not. Unfortunately, since m68k MRI mode affects
732 the scrubber, that means that we need a special purpose
733 recognizer here. */
252b5132
RH
734 if (mri_state == NULL)
735 {
736 if ((state == 0 || state == 1)
737 && ch == mri_pseudo[0])
738 mri_state = mri_pseudo + 1;
739 }
740 else
741 {
742 /* We advance to the next state if we find the right
743 character, or if we need a space character and we get any
744 whitespace character, or if we need a '0' and we get a
745 '1' (this is so that we only need one state to handle
746 ``.mri 0'' and ``.mri 1''). */
747 if (ch != '\0'
748 && (*mri_state == ch
749 || (*mri_state == ' '
750 && lex[ch] == LEX_IS_WHITESPACE)
751 || (*mri_state == '0'
752 && ch == '1')))
753 {
754 mri_last_ch = ch;
755 ++mri_state;
756 }
757 else if (*mri_state != '\0'
758 || (lex[ch] != LEX_IS_WHITESPACE
759 && lex[ch] != LEX_IS_NEWLINE))
760 {
761 /* We did not get the expected character, or we didn't
762 get a valid terminating character after seeing the
763 entire pseudo-op, so we must go back to the
764 beginning. */
765 mri_state = NULL;
766 }
767 else
768 {
769 /* We've read the entire pseudo-op. mips_last_ch is
b1ac4c66
AM
770 either '0' or '1' indicating whether to enter or
771 leave MRI mode. */
252b5132
RH
772 do_scrub_begin (mri_last_ch == '1');
773 mri_state = NULL;
774
775 /* We continue handling the character as usual. The
b1ac4c66
AM
776 main gas reader must also handle the .mri pseudo-op
777 to control expression parsing and the like. */
252b5132
RH
778 }
779 }
780#endif
781
782 if (ch == EOF)
783 {
784 if (state != 0)
785 {
786 as_warn (_("end of file not at end of a line; newline inserted"));
787 state = 0;
788 PUT ('\n');
789 }
790 goto fromeof;
791 }
792
793 switch (lex[ch])
794 {
795 case LEX_IS_WHITESPACE:
796 do
797 {
798 ch = GET ();
799 }
800 while (ch != EOF && IS_WHITESPACE (ch));
801 if (ch == EOF)
802 goto fromeof;
803
804 if (state == 0)
805 {
806 /* Preserve a single whitespace character at the
807 beginning of a line. */
808 state = 1;
809 UNGET (ch);
810 PUT (' ');
811 break;
812 }
813
f28e8eb3 814#ifdef KEEP_WHITE_AROUND_COLON
30a2b4ef
KH
815 if (lex[ch] == LEX_IS_COLON)
816 {
817 /* Only keep this white if there's no white *after* the
b1ac4c66 818 colon. */
30a2b4ef
KH
819 ch2 = GET ();
820 UNGET (ch2);
821 if (!IS_WHITESPACE (ch2))
822 {
823 state = 9;
824 UNGET (ch);
825 PUT (' ');
826 break;
827 }
828 }
f28e8eb3 829#endif
252b5132
RH
830 if (IS_COMMENT (ch)
831 || ch == '/'
62f65a7b
DB
832 || IS_LINE_SEPARATOR (ch)
833 || IS_PARALLEL_SEPARATOR (ch))
252b5132
RH
834 {
835 if (scrub_m68k_mri)
836 {
837 /* In MRI mode, we keep these spaces. */
838 UNGET (ch);
839 PUT (' ');
840 break;
841 }
842 goto recycle;
843 }
844
845 /* If we're in state 2 or 11, we've seen a non-white
846 character followed by whitespace. If the next character
847 is ':', this is whitespace after a label name which we
848 normally must ignore. In MRI mode, though, spaces are
849 not permitted between the label and the colon. */
850 if ((state == 2 || state == 11)
851 && lex[ch] == LEX_IS_COLON
852 && ! scrub_m68k_mri)
853 {
854 state = 1;
855 PUT (ch);
856 break;
857 }
858
859 switch (state)
860 {
861 case 0:
862 state++;
863 goto recycle; /* Punted leading sp */
864 case 1:
865 /* We can arrive here if we leave a leading whitespace
866 character at the beginning of a line. */
867 goto recycle;
868 case 2:
869 state = 3;
870 if (to + 1 < toend)
871 {
872 /* Optimize common case by skipping UNGET/GET. */
873 PUT (' '); /* Sp after opco */
874 goto recycle;
875 }
876 UNGET (ch);
877 PUT (' ');
878 break;
879 case 3:
880 if (scrub_m68k_mri)
881 {
882 /* In MRI mode, we keep these spaces. */
883 UNGET (ch);
884 PUT (' ');
885 break;
886 }
887 goto recycle; /* Sp in operands */
888 case 9:
889 case 10:
890 if (scrub_m68k_mri)
891 {
892 /* In MRI mode, we keep these spaces. */
893 state = 3;
894 UNGET (ch);
895 PUT (' ');
896 break;
897 }
898 state = 10; /* Sp after symbol char */
899 goto recycle;
900 case 11:
abd63a32 901 if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
252b5132
RH
902 state = 1;
903 else
904 {
905 /* We know that ch is not ':', since we tested that
b1ac4c66
AM
906 case above. Therefore this is not a label, so it
907 must be the opcode, and we've just seen the
908 whitespace after it. */
252b5132
RH
909 state = 3;
910 }
911 UNGET (ch);
912 PUT (' '); /* Sp after label definition. */
913 break;
914 default:
915 BAD_CASE (state);
916 }
917 break;
918
919 case LEX_IS_TWOCHAR_COMMENT_1ST:
920 ch2 = GET ();
921 if (ch2 == '*')
922 {
923 for (;;)
924 {
925 do
926 {
927 ch2 = GET ();
928 if (ch2 != EOF && IS_NEWLINE (ch2))
929 add_newlines++;
930 }
931 while (ch2 != EOF && ch2 != '*');
932
933 while (ch2 == '*')
934 ch2 = GET ();
935
936 if (ch2 == EOF || ch2 == '/')
937 break;
938
939 /* This UNGET will ensure that we count newlines
b1ac4c66 940 correctly. */
252b5132
RH
941 UNGET (ch2);
942 }
943
944 if (ch2 == EOF)
945 as_warn (_("end of file in multiline comment"));
946
947 ch = ' ';
948 goto recycle;
949 }
800eeca4
JW
950#ifdef DOUBLESLASH_LINE_COMMENTS
951 else if (ch2 == '/')
952 {
953 do
954 {
955 ch = GET ();
956 }
957 while (ch != EOF && !IS_NEWLINE (ch));
958 if (ch == EOF)
959 as_warn ("end of file in comment; newline inserted");
960 state = 0;
961 PUT ('\n');
962 break;
963 }
964#endif
252b5132
RH
965 else
966 {
967 if (ch2 != EOF)
968 UNGET (ch2);
969 if (state == 9 || state == 10)
970 state = 3;
971 PUT (ch);
972 }
973 break;
974
975 case LEX_IS_STRINGQUOTE:
c9c5dcda 976 quotechar = ch;
252b5132
RH
977 if (state == 10)
978 {
204cd129 979 /* Preserve the whitespace in foo "bar". */
252b5132
RH
980 UNGET (ch);
981 state = 3;
982 PUT (' ');
983
984 /* PUT didn't jump out. We could just break, but we
b1ac4c66 985 know what will happen, so optimize a bit. */
252b5132
RH
986 ch = GET ();
987 old_state = 3;
988 }
989 else if (state == 9)
990 old_state = 3;
991 else
992 old_state = state;
993 state = 5;
994 PUT (ch);
995 break;
996
997#ifndef IEEE_STYLE
998 case LEX_IS_ONECHAR_QUOTE:
999 if (state == 10)
1000 {
204cd129 1001 /* Preserve the whitespace in foo 'b'. */
252b5132
RH
1002 UNGET (ch);
1003 state = 3;
1004 PUT (' ');
1005 break;
1006 }
1007 ch = GET ();
1008 if (ch == EOF)
1009 {
1010 as_warn (_("end of file after a one-character quote; \\0 inserted"));
1011 ch = 0;
1012 }
1013 if (ch == '\\')
1014 {
1015 ch = GET ();
1016 if (ch == EOF)
1017 {
1018 as_warn (_("end of file in escape character"));
1019 ch = '\\';
1020 }
1021 else
1022 ch = process_escape (ch);
1023 }
1024 sprintf (out_buf, "%d", (int) (unsigned char) ch);
1025
1026 /* None of these 'x constants for us. We want 'x'. */
1027 if ((ch = GET ()) != '\'')
1028 {
1029#ifdef REQUIRE_CHAR_CLOSE_QUOTE
0e389e77 1030 as_warn (_("missing close quote; (assumed)"));
252b5132
RH
1031#else
1032 if (ch != EOF)
1033 UNGET (ch);
1034#endif
1035 }
1036 if (strlen (out_buf) == 1)
1037 {
1038 PUT (out_buf[0]);
1039 break;
1040 }
1041 if (state == 9)
1042 old_state = 3;
1043 else
1044 old_state = state;
1045 state = -1;
1046 out_string = out_buf;
1047 PUT (*out_string++);
1048 break;
1049#endif
1050
1051 case LEX_IS_COLON:
f28e8eb3 1052#ifdef KEEP_WHITE_AROUND_COLON
30a2b4ef 1053 state = 9;
f28e8eb3 1054#else
252b5132
RH
1055 if (state == 9 || state == 10)
1056 state = 3;
1057 else if (state != 3)
1058 state = 1;
f28e8eb3 1059#endif
252b5132
RH
1060 PUT (ch);
1061 break;
1062
1063 case LEX_IS_NEWLINE:
1064 /* Roll out a bunch of newlines from inside comments, etc. */
1065 if (add_newlines)
1066 {
1067 --add_newlines;
1068 UNGET (ch);
1069 }
3ee4defc 1070 /* Fall through. */
252b5132
RH
1071
1072 case LEX_IS_LINE_SEPARATOR:
1073 state = 0;
1074 PUT (ch);
1075 break;
1076
62f65a7b
DB
1077 case LEX_IS_PARALLEL_SEPARATOR:
1078 state = 1;
1079 PUT (ch);
1080 break;
1081
252b5132
RH
1082#ifdef TC_V850
1083 case LEX_IS_DOUBLEDASH_1ST:
30a2b4ef 1084 ch2 = GET ();
252b5132
RH
1085 if (ch2 != '-')
1086 {
1087 UNGET (ch2);
1088 goto de_fault;
1089 }
3ee4defc 1090 /* Read and skip to end of line. */
252b5132
RH
1091 do
1092 {
1093 ch = GET ();
1094 }
1095 while (ch != EOF && ch != '\n');
204cd129 1096
252b5132 1097 if (ch == EOF)
204cd129
NC
1098 as_warn (_("end of file in comment; newline inserted"));
1099
252b5132
RH
1100 state = 0;
1101 PUT ('\n');
1102 break;
3ee4defc 1103#endif
f28e8eb3 1104#ifdef DOUBLEBAR_PARALLEL
252b5132 1105 case LEX_IS_DOUBLEBAR_1ST:
30a2b4ef 1106 ch2 = GET ();
b1ac4c66 1107 UNGET (ch2);
252b5132 1108 if (ch2 != '|')
204cd129
NC
1109 goto de_fault;
1110
b1ac4c66
AM
1111 /* Handle '||' in two states as invoking PUT twice might
1112 result in the first one jumping out of this loop. We'd
1113 then lose track of the state and one '|' char. */
1114 state = 13;
252b5132
RH
1115 PUT ('|');
1116 break;
3ee4defc 1117#endif
252b5132
RH
1118 case LEX_IS_LINE_COMMENT_START:
1119 /* FIXME-someday: The two character comment stuff was badly
1120 thought out. On i386, we want '/' as line comment start
1121 AND we want C style comments. hence this hack. The
1122 whole lexical process should be reworked. xoxorich. */
1123 if (ch == '/')
1124 {
1125 ch2 = GET ();
1126 if (ch2 == '*')
1127 {
1128 old_state = 3;
1129 state = -2;
1130 break;
1131 }
1132 else
1133 {
1134 UNGET (ch2);
1135 }
204cd129 1136 }
252b5132
RH
1137
1138 if (state == 0 || state == 1) /* Only comment at start of line. */
1139 {
1140 int startch;
1141
1142 startch = ch;
1143
1144 do
1145 {
1146 ch = GET ();
1147 }
1148 while (ch != EOF && IS_WHITESPACE (ch));
204cd129 1149
252b5132
RH
1150 if (ch == EOF)
1151 {
1152 as_warn (_("end of file in comment; newline inserted"));
1153 PUT ('\n');
1154 break;
1155 }
204cd129 1156
252b5132
RH
1157 if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1158 {
1159 /* Not a cpp line. */
1160 while (ch != EOF && !IS_NEWLINE (ch))
1161 ch = GET ();
1162 if (ch == EOF)
0e389e77 1163 as_warn (_("end of file in comment; newline inserted"));
252b5132
RH
1164 state = 0;
1165 PUT ('\n');
1166 break;
1167 }
3ee4defc 1168 /* Looks like `# 123 "filename"' from cpp. */
252b5132
RH
1169 UNGET (ch);
1170 old_state = 4;
1171 state = -1;
1172 if (scrub_m68k_mri)
1173 out_string = "\tappline ";
1174 else
1175 out_string = "\t.appline ";
1176 PUT (*out_string++);
1177 break;
1178 }
1179
1180#ifdef TC_D10V
1181 /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1182 Trap is the only short insn that has a first operand that is
1183 neither register nor label.
1184 We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
30a2b4ef
KH
1185 We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1186 already LEX_IS_LINE_COMMENT_START. However, it is the
1187 only character in line_comment_chars for d10v, hence we
1188 can recognize it as such. */
252b5132
RH
1189 /* An alternative approach would be to reset the state to 1 when
1190 we see '||', '<'- or '->', but that seems to be overkill. */
30a2b4ef
KH
1191 if (state == 10)
1192 PUT (' ');
252b5132
RH
1193#endif
1194 /* We have a line comment character which is not at the
1195 start of a line. If this is also a normal comment
1196 character, fall through. Otherwise treat it as a default
1197 character. */
1198 if (strchr (tc_comment_chars, ch) == NULL
1199 && (! scrub_m68k_mri
1200 || (ch != '!' && ch != '*')))
1201 goto de_fault;
1202 if (scrub_m68k_mri
1203 && (ch == '!' || ch == '*' || ch == '#')
1204 && state != 1
1205 && state != 10)
1206 goto de_fault;
1207 /* Fall through. */
1208 case LEX_IS_COMMENT_START:
1209#if defined TC_ARM && defined OBJ_ELF
1210 /* On the ARM, `@' is the comment character.
1211 Unfortunately this is also a special character in ELF .symver
30a2b4ef
KH
1212 directives (and .type, though we deal with those another way).
1213 So we check if this line is such a directive, and treat
1214 the character as default if so. This is a hack. */
252b5132
RH
1215 if ((symver_state != NULL) && (*symver_state == 0))
1216 goto de_fault;
4c400d5e
AM
1217#endif
1218#ifdef WARN_COMMENTS
1219 if (!found_comment)
1220 as_where (&found_comment_file, &found_comment);
252b5132
RH
1221#endif
1222 do
1223 {
1224 ch = GET ();
1225 }
1226 while (ch != EOF && !IS_NEWLINE (ch));
1227 if (ch == EOF)
1228 as_warn (_("end of file in comment; newline inserted"));
1229 state = 0;
1230 PUT ('\n');
1231 break;
1232
1233 case LEX_IS_SYMBOL_COMPONENT:
1234 if (state == 10)
1235 {
1236 /* This is a symbol character following another symbol
1237 character, with whitespace in between. We skipped
1238 the whitespace earlier, so output it now. */
1239 UNGET (ch);
1240 state = 3;
1241 PUT (' ');
1242 break;
1243 }
1244
1245 if (state == 3)
1246 state = 9;
1247
1248 /* This is a common case. Quickly copy CH and all the
b1ac4c66 1249 following symbol component or normal characters. */
252b5132
RH
1250 if (to + 1 < toend
1251 && mri_state == NULL
1252#if defined TC_ARM && defined OBJ_ELF
1253 && symver_state == NULL
1254#endif
1255 )
1256 {
1257 char *s;
1258 int len;
1259
1260 for (s = from; s < fromend; s++)
1261 {
1262 int type;
1263
30a2b4ef 1264 ch2 = *(unsigned char *) s;
252b5132
RH
1265 type = lex[ch2];
1266 if (type != 0
1267 && type != LEX_IS_SYMBOL_COMPONENT)
1268 break;
1269 }
204cd129 1270
252b5132 1271 if (s > from)
204cd129
NC
1272 /* Handle the last character normally, for
1273 simplicity. */
1274 --s;
1275
252b5132 1276 len = s - from;
204cd129 1277
252b5132
RH
1278 if (len > (toend - to) - 1)
1279 len = (toend - to) - 1;
204cd129 1280
252b5132
RH
1281 if (len > 0)
1282 {
1283 PUT (ch);
1284 if (len > 8)
1285 {
1286 memcpy (to, from, len);
1287 to += len;
1288 from += len;
1289 }
1290 else
1291 {
1292 switch (len)
1293 {
1294 case 8: *to++ = *from++;
1295 case 7: *to++ = *from++;
1296 case 6: *to++ = *from++;
1297 case 5: *to++ = *from++;
1298 case 4: *to++ = *from++;
1299 case 3: *to++ = *from++;
1300 case 2: *to++ = *from++;
1301 case 1: *to++ = *from++;
1302 }
3ee4defc 1303 }
37b75c0c
AM
1304 if (to >= toend)
1305 goto tofull;
252b5132
RH
1306 ch = GET ();
1307 }
1308 }
1309
1310 /* Fall through. */
1311 default:
1312 de_fault:
1313 /* Some relatively `normal' character. */
1314 if (state == 0)
1315 {
9a124774 1316 state = 11; /* Now seeing label definition. */
252b5132
RH
1317 }
1318 else if (state == 1)
1319 {
9a124774 1320 state = 2; /* Ditto. */
252b5132
RH
1321 }
1322 else if (state == 9)
1323 {
2cdb18a7 1324 if (!IS_SYMBOL_COMPONENT (ch))
252b5132
RH
1325 state = 3;
1326 }
1327 else if (state == 10)
1328 {
c5c834aa
AH
1329 if (ch == '\\')
1330 {
1331 /* Special handling for backslash: a backslash may
1332 be the beginning of a formal parameter (of a
1333 macro) following another symbol character, with
1334 whitespace in between. If that is the case, we
1335 output a space before the parameter. Strictly
1336 speaking, correct handling depends upon what the
1337 macro parameter expands into; if the parameter
1338 expands into something which does not start with
1339 an operand character, then we don't want to keep
1340 the space. We don't have enough information to
1341 make the right choice, so here we are making the
1342 choice which is more likely to be correct. */
1343 PUT (' ');
1344 }
1345
252b5132
RH
1346 state = 3;
1347 }
1348 PUT (ch);
1349 break;
1350 }
1351 }
1352
1353 /*NOTREACHED*/
1354
1355 fromeof:
1356 /* We have reached the end of the input. */
1357 return to - tostart;
1358
1359 tofull:
1360 /* The output buffer is full. Save any input we have not yet
1361 processed. */
1362 if (fromend > from)
1363 {
2b47531b 1364 saved_input = from;
252b5132
RH
1365 saved_input_len = fromend - from;
1366 }
1367 else
2b47531b
ILT
1368 saved_input = NULL;
1369
252b5132
RH
1370 return to - tostart;
1371}
1372
This page took 0.319455 seconds and 4 git commands to generate.