2005-10-10 H.J. Lu <hongjiu.lu@intel.com>
[deliverable/binutils-gdb.git] / gas / app.c
1 /* This is the Assembler Pre-Processor
2 Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003
4 Free Software Foundation, Inc.
5
6 This file is part of GAS, the GNU Assembler.
7
8 GAS is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GAS is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GAS; see the file COPYING. If not, write to the Free
20 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
22
23 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
24 /* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
26 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
27 pair. This needs better error-handling. */
28
29 #include <stdio.h>
30 #include "as.h" /* For BAD_CASE() only. */
31
32 #if (__STDC__ != 1)
33 #ifndef const
34 #define const /* empty */
35 #endif
36 #endif
37
38 #ifdef TC_M68K
39 /* Whether we are scrubbing in m68k MRI mode. This is different from
40 flag_m68k_mri, because the two flags will be affected by the .mri
41 pseudo-op at different times. */
42 static int scrub_m68k_mri;
43
44 /* The pseudo-op which switches in and out of MRI mode. See the
45 comment in do_scrub_chars. */
46 static const char mri_pseudo[] = ".mri 0";
47 #else
48 #define scrub_m68k_mri 0
49 #endif
50
51 #if defined TC_ARM && defined OBJ_ELF
52 /* The pseudo-op for which we need to special-case `@' characters.
53 See the comment in do_scrub_chars. */
54 static const char symver_pseudo[] = ".symver";
55 static const char * symver_state;
56 #endif
57
58 static char lex[256];
59 static const char symbol_chars[] =
60 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
61
62 #define LEX_IS_SYMBOL_COMPONENT 1
63 #define LEX_IS_WHITESPACE 2
64 #define LEX_IS_LINE_SEPARATOR 3
65 #define LEX_IS_COMMENT_START 4
66 #define LEX_IS_LINE_COMMENT_START 5
67 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
68 #define LEX_IS_STRINGQUOTE 8
69 #define LEX_IS_COLON 9
70 #define LEX_IS_NEWLINE 10
71 #define LEX_IS_ONECHAR_QUOTE 11
72 #ifdef TC_V850
73 #define LEX_IS_DOUBLEDASH_1ST 12
74 #endif
75 #ifdef TC_M32R
76 #define DOUBLEBAR_PARALLEL
77 #endif
78 #ifdef DOUBLEBAR_PARALLEL
79 #define LEX_IS_DOUBLEBAR_1ST 13
80 #endif
81 #define LEX_IS_PARALLEL_SEPARATOR 14
82 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
83 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
84 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
85 #define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
86 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
87 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
88 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
89
90 static int process_escape (int);
91
92 /* FIXME-soon: The entire lexer/parser thingy should be
93 built statically at compile time rather than dynamically
94 each and every time the assembler is run. xoxorich. */
95
96 void
97 do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
98 {
99 const char *p;
100 int c;
101
102 lex[' '] = LEX_IS_WHITESPACE;
103 lex['\t'] = LEX_IS_WHITESPACE;
104 lex['\r'] = LEX_IS_WHITESPACE;
105 lex['\n'] = LEX_IS_NEWLINE;
106 lex[':'] = LEX_IS_COLON;
107
108 #ifdef TC_M68K
109 scrub_m68k_mri = m68k_mri;
110
111 if (! m68k_mri)
112 #endif
113 {
114 lex['"'] = LEX_IS_STRINGQUOTE;
115
116 #if ! defined (TC_HPPA) && ! defined (TC_I370)
117 /* I370 uses single-quotes to delimit integer, float constants. */
118 lex['\''] = LEX_IS_ONECHAR_QUOTE;
119 #endif
120
121 #ifdef SINGLE_QUOTE_STRINGS
122 lex['\''] = LEX_IS_STRINGQUOTE;
123 #endif
124 }
125
126 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
127 in state 5 of do_scrub_chars must be changed. */
128
129 /* Note that these override the previous defaults, e.g. if ';' is a
130 comment char, then it isn't a line separator. */
131 for (p = symbol_chars; *p; ++p)
132 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
133
134 for (c = 128; c < 256; ++c)
135 lex[c] = LEX_IS_SYMBOL_COMPONENT;
136
137 #ifdef tc_symbol_chars
138 /* This macro permits the processor to specify all characters which
139 may appears in an operand. This will prevent the scrubber from
140 discarding meaningful whitespace in certain cases. The i386
141 backend uses this to support prefixes, which can confuse the
142 scrubber as to whether it is parsing operands or opcodes. */
143 for (p = tc_symbol_chars; *p; ++p)
144 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
145 #endif
146
147 /* The m68k backend wants to be able to change comment_chars. */
148 #ifndef tc_comment_chars
149 #define tc_comment_chars comment_chars
150 #endif
151 for (p = tc_comment_chars; *p; p++)
152 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
153
154 for (p = line_comment_chars; *p; p++)
155 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
156
157 for (p = line_separator_chars; *p; p++)
158 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
159
160 #ifdef tc_parallel_separator_chars
161 /* This macro permits the processor to specify all characters which
162 separate parallel insns on the same line. */
163 for (p = tc_parallel_separator_chars; *p; p++)
164 lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
165 #endif
166
167 /* Only allow slash-star comments if slash is not in use.
168 FIXME: This isn't right. We should always permit them. */
169 if (lex['/'] == 0)
170 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
171
172 #ifdef TC_M68K
173 if (m68k_mri)
174 {
175 lex['\''] = LEX_IS_STRINGQUOTE;
176 lex[';'] = LEX_IS_COMMENT_START;
177 lex['*'] = LEX_IS_LINE_COMMENT_START;
178 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
179 then it can't be used in an expression. */
180 lex['!'] = LEX_IS_LINE_COMMENT_START;
181 }
182 #endif
183
184 #ifdef TC_V850
185 lex['-'] = LEX_IS_DOUBLEDASH_1ST;
186 #endif
187 #ifdef DOUBLEBAR_PARALLEL
188 lex['|'] = LEX_IS_DOUBLEBAR_1ST;
189 #endif
190 #ifdef TC_D30V
191 /* Must do this is we want VLIW instruction with "->" or "<-". */
192 lex['-'] = LEX_IS_SYMBOL_COMPONENT;
193 #endif
194 }
195
196 /* Saved state of the scrubber. */
197 static int state;
198 static int old_state;
199 static char *out_string;
200 static char out_buf[20];
201 static int add_newlines;
202 static char *saved_input;
203 static int saved_input_len;
204 static char input_buffer[32 * 1024];
205 static const char *mri_state;
206 static char mri_last_ch;
207
208 /* Data structure for saving the state of app across #include's. Note that
209 app is called asynchronously to the parsing of the .include's, so our
210 state at the time .include is interpreted is completely unrelated.
211 That's why we have to save it all. */
212
213 struct app_save
214 {
215 int state;
216 int old_state;
217 char * out_string;
218 char out_buf[sizeof (out_buf)];
219 int add_newlines;
220 char * saved_input;
221 int saved_input_len;
222 #ifdef TC_M68K
223 int scrub_m68k_mri;
224 #endif
225 const char * mri_state;
226 char mri_last_ch;
227 #if defined TC_ARM && defined OBJ_ELF
228 const char * symver_state;
229 #endif
230 };
231
232 char *
233 app_push (void)
234 {
235 register struct app_save *saved;
236
237 saved = (struct app_save *) xmalloc (sizeof (*saved));
238 saved->state = state;
239 saved->old_state = old_state;
240 saved->out_string = out_string;
241 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
242 saved->add_newlines = add_newlines;
243 if (saved_input == NULL)
244 saved->saved_input = NULL;
245 else
246 {
247 saved->saved_input = xmalloc (saved_input_len);
248 memcpy (saved->saved_input, saved_input, saved_input_len);
249 saved->saved_input_len = saved_input_len;
250 }
251 #ifdef TC_M68K
252 saved->scrub_m68k_mri = scrub_m68k_mri;
253 #endif
254 saved->mri_state = mri_state;
255 saved->mri_last_ch = mri_last_ch;
256 #if defined TC_ARM && defined OBJ_ELF
257 saved->symver_state = symver_state;
258 #endif
259
260 /* do_scrub_begin() is not useful, just wastes time. */
261
262 state = 0;
263 saved_input = NULL;
264
265 return (char *) saved;
266 }
267
268 void
269 app_pop (char *arg)
270 {
271 register struct app_save *saved = (struct app_save *) arg;
272
273 /* There is no do_scrub_end (). */
274 state = saved->state;
275 old_state = saved->old_state;
276 out_string = saved->out_string;
277 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
278 add_newlines = saved->add_newlines;
279 if (saved->saved_input == NULL)
280 saved_input = NULL;
281 else
282 {
283 assert (saved->saved_input_len <= (int) (sizeof input_buffer));
284 memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
285 saved_input = input_buffer;
286 saved_input_len = saved->saved_input_len;
287 free (saved->saved_input);
288 }
289 #ifdef TC_M68K
290 scrub_m68k_mri = saved->scrub_m68k_mri;
291 #endif
292 mri_state = saved->mri_state;
293 mri_last_ch = saved->mri_last_ch;
294 #if defined TC_ARM && defined OBJ_ELF
295 symver_state = saved->symver_state;
296 #endif
297
298 free (arg);
299 }
300
301 /* @@ This assumes that \n &c are the same on host and target. This is not
302 necessarily true. */
303
304 static int
305 process_escape (int ch)
306 {
307 switch (ch)
308 {
309 case 'b':
310 return '\b';
311 case 'f':
312 return '\f';
313 case 'n':
314 return '\n';
315 case 'r':
316 return '\r';
317 case 't':
318 return '\t';
319 case '\'':
320 return '\'';
321 case '"':
322 return '\"';
323 default:
324 return ch;
325 }
326 }
327
328 /* This function is called to process input characters. The GET
329 parameter is used to retrieve more input characters. GET should
330 set its parameter to point to a buffer, and return the length of
331 the buffer; it should return 0 at end of file. The scrubbed output
332 characters are put into the buffer starting at TOSTART; the TOSTART
333 buffer is TOLEN bytes in length. The function returns the number
334 of scrubbed characters put into TOSTART. This will be TOLEN unless
335 end of file was seen. This function is arranged as a state
336 machine, and saves its state so that it may return at any point.
337 This is the way the old code used to work. */
338
339 int
340 do_scrub_chars (int (*get) (char *, int), char *tostart, int tolen)
341 {
342 char *to = tostart;
343 char *toend = tostart + tolen;
344 char *from;
345 char *fromend;
346 int fromlen;
347 register int ch, ch2 = 0;
348 /* Character that started the string we're working on. */
349 static char quotechar;
350
351 /*State 0: beginning of normal line
352 1: After first whitespace on line (flush more white)
353 2: After first non-white (opcode) on line (keep 1white)
354 3: after second white on line (into operands) (flush white)
355 4: after putting out a .line, put out digits
356 5: parsing a string, then go to old-state
357 6: putting out \ escape in a "d string.
358 7: After putting out a .appfile, put out string.
359 8: After putting out a .appfile string, flush until newline.
360 9: After seeing symbol char in state 3 (keep 1white after symchar)
361 10: After seeing whitespace in state 9 (keep white before symchar)
362 11: After seeing a symbol character in state 0 (eg a label definition)
363 -1: output string in out_string and go to the state in old_state
364 -2: flush text until a '*' '/' is seen, then go to state old_state
365 #ifdef TC_V850
366 12: After seeing a dash, looking for a second dash as a start
367 of comment.
368 #endif
369 #ifdef DOUBLEBAR_PARALLEL
370 13: After seeing a vertical bar, looking for a second
371 vertical bar as a parallel expression separator.
372 #endif
373 #ifdef TC_IA64
374 14: After seeing a `(' at state 0, looking for a `)' as
375 predicate.
376 15: After seeing a `(' at state 1, looking for a `)' as
377 predicate.
378 #endif
379 */
380
381 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
382 constructs like ``.loc 1 20''. This was turning into ``.loc
383 120''. States 9 and 10 ensure that a space is never dropped in
384 between characters which could appear in an identifier. Ian
385 Taylor, ian@cygnus.com.
386
387 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
388 correctly on the PA (and any other target where colons are optional).
389 Jeff Law, law@cs.utah.edu.
390
391 I added state 13 so that something like "cmp r1, r2 || trap #1" does not
392 get squashed into "cmp r1,r2||trap#1", with the all important space
393 between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
394
395 /* This macro gets the next input character. */
396
397 #define GET() \
398 (from < fromend \
399 ? * (unsigned char *) (from++) \
400 : (saved_input = NULL, \
401 fromlen = (*get) (input_buffer, sizeof input_buffer), \
402 from = input_buffer, \
403 fromend = from + fromlen, \
404 (fromlen == 0 \
405 ? EOF \
406 : * (unsigned char *) (from++))))
407
408 /* This macro pushes a character back on the input stream. */
409
410 #define UNGET(uch) (*--from = (uch))
411
412 /* This macro puts a character into the output buffer. If this
413 character fills the output buffer, this macro jumps to the label
414 TOFULL. We use this rather ugly approach because we need to
415 handle two different termination conditions: EOF on the input
416 stream, and a full output buffer. It would be simpler if we
417 always read in the entire input stream before processing it, but
418 I don't want to make such a significant change to the assembler's
419 memory usage. */
420
421 #define PUT(pch) \
422 do \
423 { \
424 *to++ = (pch); \
425 if (to >= toend) \
426 goto tofull; \
427 } \
428 while (0)
429
430 if (saved_input != NULL)
431 {
432 from = saved_input;
433 fromend = from + saved_input_len;
434 }
435 else
436 {
437 fromlen = (*get) (input_buffer, sizeof input_buffer);
438 if (fromlen == 0)
439 return 0;
440 from = input_buffer;
441 fromend = from + fromlen;
442 }
443
444 while (1)
445 {
446 /* The cases in this switch end with continue, in order to
447 branch back to the top of this while loop and generate the
448 next output character in the appropriate state. */
449 switch (state)
450 {
451 case -1:
452 ch = *out_string++;
453 if (*out_string == '\0')
454 {
455 state = old_state;
456 old_state = 3;
457 }
458 PUT (ch);
459 continue;
460
461 case -2:
462 for (;;)
463 {
464 do
465 {
466 ch = GET ();
467
468 if (ch == EOF)
469 {
470 as_warn (_("end of file in comment"));
471 goto fromeof;
472 }
473
474 if (ch == '\n')
475 PUT ('\n');
476 }
477 while (ch != '*');
478
479 while ((ch = GET ()) == '*')
480 ;
481
482 if (ch == EOF)
483 {
484 as_warn (_("end of file in comment"));
485 goto fromeof;
486 }
487
488 if (ch == '/')
489 break;
490
491 UNGET (ch);
492 }
493
494 state = old_state;
495 UNGET (' ');
496 continue;
497
498 case 4:
499 ch = GET ();
500 if (ch == EOF)
501 goto fromeof;
502 else if (ch >= '0' && ch <= '9')
503 PUT (ch);
504 else
505 {
506 while (ch != EOF && IS_WHITESPACE (ch))
507 ch = GET ();
508 if (ch == '"')
509 {
510 UNGET (ch);
511 if (scrub_m68k_mri)
512 out_string = "\n\tappfile ";
513 else
514 out_string = "\n\t.appfile ";
515 old_state = 7;
516 state = -1;
517 PUT (*out_string++);
518 }
519 else
520 {
521 while (ch != EOF && ch != '\n')
522 ch = GET ();
523 state = 0;
524 PUT (ch);
525 }
526 }
527 continue;
528
529 case 5:
530 /* We are going to copy everything up to a quote character,
531 with special handling for a backslash. We try to
532 optimize the copying in the simple case without using the
533 GET and PUT macros. */
534 {
535 char *s;
536 int len;
537
538 for (s = from; s < fromend; s++)
539 {
540 ch = *s;
541 if (ch == '\\'
542 || ch == quotechar
543 || ch == '\n')
544 break;
545 }
546 len = s - from;
547 if (len > toend - to)
548 len = toend - to;
549 if (len > 0)
550 {
551 memcpy (to, from, len);
552 to += len;
553 from += len;
554 }
555 }
556
557 ch = GET ();
558 if (ch == EOF)
559 {
560 as_warn (_("end of file in string; '%c' inserted"), quotechar);
561 state = old_state;
562 UNGET ('\n');
563 PUT (quotechar);
564 }
565 else if (ch == quotechar)
566 {
567 state = old_state;
568 PUT (ch);
569 }
570 #ifndef NO_STRING_ESCAPES
571 else if (ch == '\\')
572 {
573 state = 6;
574 PUT (ch);
575 }
576 #endif
577 else if (scrub_m68k_mri && ch == '\n')
578 {
579 /* Just quietly terminate the string. This permits lines like
580 bne label loop if we haven't reach end yet. */
581 state = old_state;
582 UNGET (ch);
583 PUT ('\'');
584 }
585 else
586 {
587 PUT (ch);
588 }
589 continue;
590
591 case 6:
592 state = 5;
593 ch = GET ();
594 switch (ch)
595 {
596 /* Handle strings broken across lines, by turning '\n' into
597 '\\' and 'n'. */
598 case '\n':
599 UNGET ('n');
600 add_newlines++;
601 PUT ('\\');
602 continue;
603
604 case EOF:
605 as_warn (_("end of file in string; '%c' inserted"), quotechar);
606 PUT (quotechar);
607 continue;
608
609 case '"':
610 case '\\':
611 case 'b':
612 case 'f':
613 case 'n':
614 case 'r':
615 case 't':
616 case 'v':
617 case 'x':
618 case 'X':
619 case '0':
620 case '1':
621 case '2':
622 case '3':
623 case '4':
624 case '5':
625 case '6':
626 case '7':
627 break;
628
629 default:
630 #ifdef ONLY_STANDARD_ESCAPES
631 as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
632 #endif
633 break;
634 }
635 PUT (ch);
636 continue;
637
638 case 7:
639 ch = GET ();
640 quotechar = ch;
641 state = 5;
642 old_state = 8;
643 PUT (ch);
644 continue;
645
646 case 8:
647 do
648 ch = GET ();
649 while (ch != '\n' && ch != EOF);
650 if (ch == EOF)
651 goto fromeof;
652 state = 0;
653 PUT (ch);
654 continue;
655
656 #ifdef DOUBLEBAR_PARALLEL
657 case 13:
658 ch = GET ();
659 if (ch != '|')
660 abort ();
661
662 /* Reset back to state 1 and pretend that we are parsing a
663 line from just after the first white space. */
664 state = 1;
665 PUT ('|');
666 continue;
667 #endif
668 }
669
670 /* OK, we are somewhere in states 0 through 4 or 9 through 11. */
671
672 /* flushchar: */
673 ch = GET ();
674
675 #ifdef TC_IA64
676 if (ch == '(' && (state == 0 || state == 1))
677 {
678 state += 14;
679 PUT (ch);
680 continue;
681 }
682 else if (state == 14 || state == 15)
683 {
684 if (ch == ')')
685 {
686 state -= 14;
687 PUT (ch);
688 ch = GET ();
689 }
690 else
691 {
692 PUT (ch);
693 continue;
694 }
695 }
696 #endif
697
698 recycle:
699
700 #if defined TC_ARM && defined OBJ_ELF
701 /* We need to watch out for .symver directives. See the comment later
702 in this function. */
703 if (symver_state == NULL)
704 {
705 if ((state == 0 || state == 1) && ch == symver_pseudo[0])
706 symver_state = symver_pseudo + 1;
707 }
708 else
709 {
710 /* We advance to the next state if we find the right
711 character. */
712 if (ch != '\0' && (*symver_state == ch))
713 ++symver_state;
714 else if (*symver_state != '\0')
715 /* We did not get the expected character, or we didn't
716 get a valid terminating character after seeing the
717 entire pseudo-op, so we must go back to the beginning. */
718 symver_state = NULL;
719 else
720 {
721 /* We've read the entire pseudo-op. If this is the end
722 of the line, go back to the beginning. */
723 if (IS_NEWLINE (ch))
724 symver_state = NULL;
725 }
726 }
727 #endif /* TC_ARM && OBJ_ELF */
728
729 #ifdef TC_M68K
730 /* We want to have pseudo-ops which control whether we are in
731 MRI mode or not. Unfortunately, since m68k MRI mode affects
732 the scrubber, that means that we need a special purpose
733 recognizer here. */
734 if (mri_state == NULL)
735 {
736 if ((state == 0 || state == 1)
737 && ch == mri_pseudo[0])
738 mri_state = mri_pseudo + 1;
739 }
740 else
741 {
742 /* We advance to the next state if we find the right
743 character, or if we need a space character and we get any
744 whitespace character, or if we need a '0' and we get a
745 '1' (this is so that we only need one state to handle
746 ``.mri 0'' and ``.mri 1''). */
747 if (ch != '\0'
748 && (*mri_state == ch
749 || (*mri_state == ' '
750 && lex[ch] == LEX_IS_WHITESPACE)
751 || (*mri_state == '0'
752 && ch == '1')))
753 {
754 mri_last_ch = ch;
755 ++mri_state;
756 }
757 else if (*mri_state != '\0'
758 || (lex[ch] != LEX_IS_WHITESPACE
759 && lex[ch] != LEX_IS_NEWLINE))
760 {
761 /* We did not get the expected character, or we didn't
762 get a valid terminating character after seeing the
763 entire pseudo-op, so we must go back to the
764 beginning. */
765 mri_state = NULL;
766 }
767 else
768 {
769 /* We've read the entire pseudo-op. mips_last_ch is
770 either '0' or '1' indicating whether to enter or
771 leave MRI mode. */
772 do_scrub_begin (mri_last_ch == '1');
773 mri_state = NULL;
774
775 /* We continue handling the character as usual. The
776 main gas reader must also handle the .mri pseudo-op
777 to control expression parsing and the like. */
778 }
779 }
780 #endif
781
782 if (ch == EOF)
783 {
784 if (state != 0)
785 {
786 as_warn (_("end of file not at end of a line; newline inserted"));
787 state = 0;
788 PUT ('\n');
789 }
790 goto fromeof;
791 }
792
793 switch (lex[ch])
794 {
795 case LEX_IS_WHITESPACE:
796 do
797 {
798 ch = GET ();
799 }
800 while (ch != EOF && IS_WHITESPACE (ch));
801 if (ch == EOF)
802 goto fromeof;
803
804 if (state == 0)
805 {
806 /* Preserve a single whitespace character at the
807 beginning of a line. */
808 state = 1;
809 UNGET (ch);
810 PUT (' ');
811 break;
812 }
813
814 #ifdef KEEP_WHITE_AROUND_COLON
815 if (lex[ch] == LEX_IS_COLON)
816 {
817 /* Only keep this white if there's no white *after* the
818 colon. */
819 ch2 = GET ();
820 UNGET (ch2);
821 if (!IS_WHITESPACE (ch2))
822 {
823 state = 9;
824 UNGET (ch);
825 PUT (' ');
826 break;
827 }
828 }
829 #endif
830 if (IS_COMMENT (ch)
831 || ch == '/'
832 || IS_LINE_SEPARATOR (ch)
833 || IS_PARALLEL_SEPARATOR (ch))
834 {
835 if (scrub_m68k_mri)
836 {
837 /* In MRI mode, we keep these spaces. */
838 UNGET (ch);
839 PUT (' ');
840 break;
841 }
842 goto recycle;
843 }
844
845 /* If we're in state 2 or 11, we've seen a non-white
846 character followed by whitespace. If the next character
847 is ':', this is whitespace after a label name which we
848 normally must ignore. In MRI mode, though, spaces are
849 not permitted between the label and the colon. */
850 if ((state == 2 || state == 11)
851 && lex[ch] == LEX_IS_COLON
852 && ! scrub_m68k_mri)
853 {
854 state = 1;
855 PUT (ch);
856 break;
857 }
858
859 switch (state)
860 {
861 case 0:
862 state++;
863 goto recycle; /* Punted leading sp */
864 case 1:
865 /* We can arrive here if we leave a leading whitespace
866 character at the beginning of a line. */
867 goto recycle;
868 case 2:
869 state = 3;
870 if (to + 1 < toend)
871 {
872 /* Optimize common case by skipping UNGET/GET. */
873 PUT (' '); /* Sp after opco */
874 goto recycle;
875 }
876 UNGET (ch);
877 PUT (' ');
878 break;
879 case 3:
880 if (scrub_m68k_mri)
881 {
882 /* In MRI mode, we keep these spaces. */
883 UNGET (ch);
884 PUT (' ');
885 break;
886 }
887 goto recycle; /* Sp in operands */
888 case 9:
889 case 10:
890 if (scrub_m68k_mri)
891 {
892 /* In MRI mode, we keep these spaces. */
893 state = 3;
894 UNGET (ch);
895 PUT (' ');
896 break;
897 }
898 state = 10; /* Sp after symbol char */
899 goto recycle;
900 case 11:
901 if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
902 state = 1;
903 else
904 {
905 /* We know that ch is not ':', since we tested that
906 case above. Therefore this is not a label, so it
907 must be the opcode, and we've just seen the
908 whitespace after it. */
909 state = 3;
910 }
911 UNGET (ch);
912 PUT (' '); /* Sp after label definition. */
913 break;
914 default:
915 BAD_CASE (state);
916 }
917 break;
918
919 case LEX_IS_TWOCHAR_COMMENT_1ST:
920 ch2 = GET ();
921 if (ch2 == '*')
922 {
923 for (;;)
924 {
925 do
926 {
927 ch2 = GET ();
928 if (ch2 != EOF && IS_NEWLINE (ch2))
929 add_newlines++;
930 }
931 while (ch2 != EOF && ch2 != '*');
932
933 while (ch2 == '*')
934 ch2 = GET ();
935
936 if (ch2 == EOF || ch2 == '/')
937 break;
938
939 /* This UNGET will ensure that we count newlines
940 correctly. */
941 UNGET (ch2);
942 }
943
944 if (ch2 == EOF)
945 as_warn (_("end of file in multiline comment"));
946
947 ch = ' ';
948 goto recycle;
949 }
950 #ifdef DOUBLESLASH_LINE_COMMENTS
951 else if (ch2 == '/')
952 {
953 do
954 {
955 ch = GET ();
956 }
957 while (ch != EOF && !IS_NEWLINE (ch));
958 if (ch == EOF)
959 as_warn ("end of file in comment; newline inserted");
960 state = 0;
961 PUT ('\n');
962 break;
963 }
964 #endif
965 else
966 {
967 if (ch2 != EOF)
968 UNGET (ch2);
969 if (state == 9 || state == 10)
970 state = 3;
971 PUT (ch);
972 }
973 break;
974
975 case LEX_IS_STRINGQUOTE:
976 quotechar = ch;
977 if (state == 10)
978 {
979 /* Preserve the whitespace in foo "bar". */
980 UNGET (ch);
981 state = 3;
982 PUT (' ');
983
984 /* PUT didn't jump out. We could just break, but we
985 know what will happen, so optimize a bit. */
986 ch = GET ();
987 old_state = 3;
988 }
989 else if (state == 9)
990 old_state = 3;
991 else
992 old_state = state;
993 state = 5;
994 PUT (ch);
995 break;
996
997 #ifndef IEEE_STYLE
998 case LEX_IS_ONECHAR_QUOTE:
999 if (state == 10)
1000 {
1001 /* Preserve the whitespace in foo 'b'. */
1002 UNGET (ch);
1003 state = 3;
1004 PUT (' ');
1005 break;
1006 }
1007 ch = GET ();
1008 if (ch == EOF)
1009 {
1010 as_warn (_("end of file after a one-character quote; \\0 inserted"));
1011 ch = 0;
1012 }
1013 if (ch == '\\')
1014 {
1015 ch = GET ();
1016 if (ch == EOF)
1017 {
1018 as_warn (_("end of file in escape character"));
1019 ch = '\\';
1020 }
1021 else
1022 ch = process_escape (ch);
1023 }
1024 sprintf (out_buf, "%d", (int) (unsigned char) ch);
1025
1026 /* None of these 'x constants for us. We want 'x'. */
1027 if ((ch = GET ()) != '\'')
1028 {
1029 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
1030 as_warn (_("missing close quote; (assumed)"));
1031 #else
1032 if (ch != EOF)
1033 UNGET (ch);
1034 #endif
1035 }
1036 if (strlen (out_buf) == 1)
1037 {
1038 PUT (out_buf[0]);
1039 break;
1040 }
1041 if (state == 9)
1042 old_state = 3;
1043 else
1044 old_state = state;
1045 state = -1;
1046 out_string = out_buf;
1047 PUT (*out_string++);
1048 break;
1049 #endif
1050
1051 case LEX_IS_COLON:
1052 #ifdef KEEP_WHITE_AROUND_COLON
1053 state = 9;
1054 #else
1055 if (state == 9 || state == 10)
1056 state = 3;
1057 else if (state != 3)
1058 state = 1;
1059 #endif
1060 PUT (ch);
1061 break;
1062
1063 case LEX_IS_NEWLINE:
1064 /* Roll out a bunch of newlines from inside comments, etc. */
1065 if (add_newlines)
1066 {
1067 --add_newlines;
1068 UNGET (ch);
1069 }
1070 /* Fall through. */
1071
1072 case LEX_IS_LINE_SEPARATOR:
1073 state = 0;
1074 PUT (ch);
1075 break;
1076
1077 case LEX_IS_PARALLEL_SEPARATOR:
1078 state = 1;
1079 PUT (ch);
1080 break;
1081
1082 #ifdef TC_V850
1083 case LEX_IS_DOUBLEDASH_1ST:
1084 ch2 = GET ();
1085 if (ch2 != '-')
1086 {
1087 UNGET (ch2);
1088 goto de_fault;
1089 }
1090 /* Read and skip to end of line. */
1091 do
1092 {
1093 ch = GET ();
1094 }
1095 while (ch != EOF && ch != '\n');
1096
1097 if (ch == EOF)
1098 as_warn (_("end of file in comment; newline inserted"));
1099
1100 state = 0;
1101 PUT ('\n');
1102 break;
1103 #endif
1104 #ifdef DOUBLEBAR_PARALLEL
1105 case LEX_IS_DOUBLEBAR_1ST:
1106 ch2 = GET ();
1107 UNGET (ch2);
1108 if (ch2 != '|')
1109 goto de_fault;
1110
1111 /* Handle '||' in two states as invoking PUT twice might
1112 result in the first one jumping out of this loop. We'd
1113 then lose track of the state and one '|' char. */
1114 state = 13;
1115 PUT ('|');
1116 break;
1117 #endif
1118 case LEX_IS_LINE_COMMENT_START:
1119 /* FIXME-someday: The two character comment stuff was badly
1120 thought out. On i386, we want '/' as line comment start
1121 AND we want C style comments. hence this hack. The
1122 whole lexical process should be reworked. xoxorich. */
1123 if (ch == '/')
1124 {
1125 ch2 = GET ();
1126 if (ch2 == '*')
1127 {
1128 old_state = 3;
1129 state = -2;
1130 break;
1131 }
1132 else
1133 {
1134 UNGET (ch2);
1135 }
1136 }
1137
1138 if (state == 0 || state == 1) /* Only comment at start of line. */
1139 {
1140 int startch;
1141
1142 startch = ch;
1143
1144 do
1145 {
1146 ch = GET ();
1147 }
1148 while (ch != EOF && IS_WHITESPACE (ch));
1149
1150 if (ch == EOF)
1151 {
1152 as_warn (_("end of file in comment; newline inserted"));
1153 PUT ('\n');
1154 break;
1155 }
1156
1157 if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1158 {
1159 /* Not a cpp line. */
1160 while (ch != EOF && !IS_NEWLINE (ch))
1161 ch = GET ();
1162 if (ch == EOF)
1163 as_warn (_("end of file in comment; newline inserted"));
1164 state = 0;
1165 PUT ('\n');
1166 break;
1167 }
1168 /* Looks like `# 123 "filename"' from cpp. */
1169 UNGET (ch);
1170 old_state = 4;
1171 state = -1;
1172 if (scrub_m68k_mri)
1173 out_string = "\tappline ";
1174 else
1175 out_string = "\t.appline ";
1176 PUT (*out_string++);
1177 break;
1178 }
1179
1180 #ifdef TC_D10V
1181 /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1182 Trap is the only short insn that has a first operand that is
1183 neither register nor label.
1184 We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1185 We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1186 already LEX_IS_LINE_COMMENT_START. However, it is the
1187 only character in line_comment_chars for d10v, hence we
1188 can recognize it as such. */
1189 /* An alternative approach would be to reset the state to 1 when
1190 we see '||', '<'- or '->', but that seems to be overkill. */
1191 if (state == 10)
1192 PUT (' ');
1193 #endif
1194 /* We have a line comment character which is not at the
1195 start of a line. If this is also a normal comment
1196 character, fall through. Otherwise treat it as a default
1197 character. */
1198 if (strchr (tc_comment_chars, ch) == NULL
1199 && (! scrub_m68k_mri
1200 || (ch != '!' && ch != '*')))
1201 goto de_fault;
1202 if (scrub_m68k_mri
1203 && (ch == '!' || ch == '*' || ch == '#')
1204 && state != 1
1205 && state != 10)
1206 goto de_fault;
1207 /* Fall through. */
1208 case LEX_IS_COMMENT_START:
1209 #if defined TC_ARM && defined OBJ_ELF
1210 /* On the ARM, `@' is the comment character.
1211 Unfortunately this is also a special character in ELF .symver
1212 directives (and .type, though we deal with those another way).
1213 So we check if this line is such a directive, and treat
1214 the character as default if so. This is a hack. */
1215 if ((symver_state != NULL) && (*symver_state == 0))
1216 goto de_fault;
1217 #endif
1218 #ifdef WARN_COMMENTS
1219 if (!found_comment)
1220 as_where (&found_comment_file, &found_comment);
1221 #endif
1222 do
1223 {
1224 ch = GET ();
1225 }
1226 while (ch != EOF && !IS_NEWLINE (ch));
1227 if (ch == EOF)
1228 as_warn (_("end of file in comment; newline inserted"));
1229 state = 0;
1230 PUT ('\n');
1231 break;
1232
1233 case LEX_IS_SYMBOL_COMPONENT:
1234 if (state == 10)
1235 {
1236 /* This is a symbol character following another symbol
1237 character, with whitespace in between. We skipped
1238 the whitespace earlier, so output it now. */
1239 UNGET (ch);
1240 state = 3;
1241 PUT (' ');
1242 break;
1243 }
1244
1245 if (state == 3)
1246 state = 9;
1247
1248 /* This is a common case. Quickly copy CH and all the
1249 following symbol component or normal characters. */
1250 if (to + 1 < toend
1251 && mri_state == NULL
1252 #if defined TC_ARM && defined OBJ_ELF
1253 && symver_state == NULL
1254 #endif
1255 )
1256 {
1257 char *s;
1258 int len;
1259
1260 for (s = from; s < fromend; s++)
1261 {
1262 int type;
1263
1264 ch2 = *(unsigned char *) s;
1265 type = lex[ch2];
1266 if (type != 0
1267 && type != LEX_IS_SYMBOL_COMPONENT)
1268 break;
1269 }
1270
1271 if (s > from)
1272 /* Handle the last character normally, for
1273 simplicity. */
1274 --s;
1275
1276 len = s - from;
1277
1278 if (len > (toend - to) - 1)
1279 len = (toend - to) - 1;
1280
1281 if (len > 0)
1282 {
1283 PUT (ch);
1284 if (len > 8)
1285 {
1286 memcpy (to, from, len);
1287 to += len;
1288 from += len;
1289 }
1290 else
1291 {
1292 switch (len)
1293 {
1294 case 8: *to++ = *from++;
1295 case 7: *to++ = *from++;
1296 case 6: *to++ = *from++;
1297 case 5: *to++ = *from++;
1298 case 4: *to++ = *from++;
1299 case 3: *to++ = *from++;
1300 case 2: *to++ = *from++;
1301 case 1: *to++ = *from++;
1302 }
1303 }
1304 if (to >= toend)
1305 goto tofull;
1306 ch = GET ();
1307 }
1308 }
1309
1310 /* Fall through. */
1311 default:
1312 de_fault:
1313 /* Some relatively `normal' character. */
1314 if (state == 0)
1315 {
1316 state = 11; /* Now seeing label definition. */
1317 }
1318 else if (state == 1)
1319 {
1320 state = 2; /* Ditto. */
1321 }
1322 else if (state == 9)
1323 {
1324 if (!IS_SYMBOL_COMPONENT (ch))
1325 state = 3;
1326 }
1327 else if (state == 10)
1328 {
1329 if (ch == '\\')
1330 {
1331 /* Special handling for backslash: a backslash may
1332 be the beginning of a formal parameter (of a
1333 macro) following another symbol character, with
1334 whitespace in between. If that is the case, we
1335 output a space before the parameter. Strictly
1336 speaking, correct handling depends upon what the
1337 macro parameter expands into; if the parameter
1338 expands into something which does not start with
1339 an operand character, then we don't want to keep
1340 the space. We don't have enough information to
1341 make the right choice, so here we are making the
1342 choice which is more likely to be correct. */
1343 PUT (' ');
1344 }
1345
1346 state = 3;
1347 }
1348 PUT (ch);
1349 break;
1350 }
1351 }
1352
1353 /*NOTREACHED*/
1354
1355 fromeof:
1356 /* We have reached the end of the input. */
1357 return to - tostart;
1358
1359 tofull:
1360 /* The output buffer is full. Save any input we have not yet
1361 processed. */
1362 if (fromend > from)
1363 {
1364 saved_input = from;
1365 saved_input_len = fromend - from;
1366 }
1367 else
1368 saved_input = NULL;
1369
1370 return to - tostart;
1371 }
1372
This page took 0.060197 seconds and 4 git commands to generate.