* target.h: Add enum target_waitkind, enum target_signal, and
[deliverable/binutils-gdb.git] / gas / app.c
1 /* This is the Assembler Pre-Processor
2 Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
3
4 This file is part of GAS, the GNU Assembler.
5
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GAS is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
21 /* App, the assembler pre-processor. This pre-processor strips out excess
22 spaces, turns single-quoted characters into a decimal constant, and turns
23 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
24 pair. This needs better error-handling.
25 */
26
27 #include <stdio.h>
28 #include "as.h" /* For BAD_CASE() only */
29
30 #if (__STDC__ != 1) && !defined(const)
31 #define const /* Nothing */
32 #endif
33
34 static char lex[256];
35 static const char symbol_chars[] =
36 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
37
38 #define LEX_IS_SYMBOL_COMPONENT 1
39 #define LEX_IS_WHITESPACE 2
40 #define LEX_IS_LINE_SEPARATOR 3
41 #define LEX_IS_COMMENT_START 4
42 #define LEX_IS_LINE_COMMENT_START 5
43 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
44 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
45 #define LEX_IS_STRINGQUOTE 8
46 #define LEX_IS_COLON 9
47 #define LEX_IS_NEWLINE 10
48 #define LEX_IS_ONECHAR_QUOTE 11
49 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
50 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
51 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
52 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
53 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
54 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
55
56 static int process_escape PARAMS ((int));
57
58 /* FIXME-soon: The entire lexer/parser thingy should be
59 built statically at compile time rather than dynamically
60 each and every time the assembler is run. xoxorich. */
61
62 void
63 do_scrub_begin ()
64 {
65 const char *p;
66
67 lex[' '] = LEX_IS_WHITESPACE;
68 lex['\t'] = LEX_IS_WHITESPACE;
69 lex['\n'] = LEX_IS_NEWLINE;
70 lex[';'] = LEX_IS_LINE_SEPARATOR;
71 lex['"'] = LEX_IS_STRINGQUOTE;
72 #ifndef TC_HPPA
73 lex['\''] = LEX_IS_ONECHAR_QUOTE;
74 #endif
75 lex[':'] = LEX_IS_COLON;
76
77
78
79 #ifdef SINGLE_QUOTE_STRINGS
80 lex['\''] = LEX_IS_STRINGQUOTE;
81 #endif
82
83 /* Note that these override the previous defaults, e.g. if ';'
84
85 is a comment char, then it isn't a line separator. */
86 for (p = symbol_chars; *p; ++p)
87 {
88 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
89 } /* declare symbol characters */
90
91 for (p = comment_chars; *p; p++)
92 {
93 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
94 } /* declare comment chars */
95
96 for (p = line_comment_chars; *p; p++)
97 {
98 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
99 } /* declare line comment chars */
100
101 for (p = line_separator_chars; *p; p++)
102 {
103 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
104 } /* declare line separators */
105
106 /* Only allow slash-star comments if slash is not in use */
107 if (lex['/'] == 0)
108 {
109 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
110 }
111 /* FIXME-soon. This is a bad hack but otherwise, we
112 can't do c-style comments when '/' is a line
113 comment char. xoxorich. */
114 if (lex['*'] == 0)
115 {
116 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
117 }
118 } /* do_scrub_begin() */
119
120 FILE *scrub_file;
121
122 int
123 scrub_from_file ()
124 {
125 return getc (scrub_file);
126 }
127
128 void
129 scrub_to_file (ch)
130 int ch;
131 {
132 ungetc (ch, scrub_file);
133 } /* scrub_to_file() */
134
135 char *scrub_string;
136 char *scrub_last_string;
137
138 int
139 scrub_from_string ()
140 {
141 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
142 } /* scrub_from_string() */
143
144 void
145 scrub_to_string (ch)
146 int ch;
147 {
148 *--scrub_string = ch;
149 } /* scrub_to_string() */
150
151 /* Saved state of the scrubber */
152 static int state;
153 static int old_state;
154 static char *out_string;
155 static char out_buf[20];
156 static int add_newlines = 0;
157
158 /* Data structure for saving the state of app across #include's. Note that
159 app is called asynchronously to the parsing of the .include's, so our
160 state at the time .include is interpreted is completely unrelated.
161 That's why we have to save it all. */
162
163 struct app_save
164 {
165 int state;
166 int old_state;
167 char *out_string;
168 char out_buf[sizeof (out_buf)];
169 int add_newlines;
170 char *scrub_string;
171 char *scrub_last_string;
172 FILE *scrub_file;
173 };
174
175 char *
176 app_push ()
177 {
178 register struct app_save *saved;
179
180 saved = (struct app_save *) xmalloc (sizeof (*saved));
181 saved->state = state;
182 saved->old_state = old_state;
183 saved->out_string = out_string;
184 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
185 saved->add_newlines = add_newlines;
186 saved->scrub_string = scrub_string;
187 saved->scrub_last_string = scrub_last_string;
188 saved->scrub_file = scrub_file;
189
190 /* do_scrub_begin() is not useful, just wastes time. */
191 return (char *) saved;
192 }
193
194 void
195 app_pop (arg)
196 char *arg;
197 {
198 register struct app_save *saved = (struct app_save *) arg;
199
200 /* There is no do_scrub_end (). */
201 state = saved->state;
202 old_state = saved->old_state;
203 out_string = saved->out_string;
204 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
205 add_newlines = saved->add_newlines;
206 scrub_string = saved->scrub_string;
207 scrub_last_string = saved->scrub_last_string;
208 scrub_file = saved->scrub_file;
209
210 free (arg);
211 } /* app_pop() */
212
213 /* @@ This assumes that \n &c are the same on host and target. This is not
214 necessarily true. */
215 static int
216 process_escape (ch)
217 int ch;
218 {
219 switch (ch)
220 {
221 case 'b':
222 return '\b';
223 case 'f':
224 return '\f';
225 case 'n':
226 return '\n';
227 case 'r':
228 return '\r';
229 case 't':
230 return '\t';
231 case '\'':
232 return '\'';
233 case '"':
234 return '\"';
235 default:
236 return ch;
237 }
238 }
239 int
240 do_scrub_next_char (get, unget)
241 int (*get) ();
242 void (*unget) ();
243 {
244 /*State 0: beginning of normal line
245 1: After first whitespace on line (flush more white)
246 2: After first non-white (opcode) on line (keep 1white)
247 3: after second white on line (into operands) (flush white)
248 4: after putting out a .line, put out digits
249 5: parsing a string, then go to old-state
250 6: putting out \ escape in a "d string.
251 7: After putting out a .appfile, put out string.
252 8: After putting out a .appfile string, flush until newline.
253 9: After seeing symbol char in state 3 (keep 1white after symchar)
254 10: After seeing whitespace in state 9 (keep white before symchar)
255 -1: output string in out_string and go to the state in old_state
256 -2: flush text until a '*' '/' is seen, then go to state old_state
257 */
258
259 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
260 constructs like ``.loc 1 20''. This was turning into ``.loc
261 120''. States 9 and 10 ensure that a space is never dropped in
262 between characters which could appear in a identifier. Ian
263 Taylor, ian@cygnus.com. */
264
265 register int ch, ch2 = 0;
266 int not_cpp_line = 0;
267
268 switch (state)
269 {
270 case -1:
271 ch = *out_string++;
272 if (*out_string == 0)
273 {
274 state = old_state;
275 old_state = 3;
276 }
277 return ch;
278
279 case -2:
280 for (;;)
281 {
282 do
283 {
284 ch = (*get) ();
285 }
286 while (ch != EOF && ch != '\n' && ch != '*');
287 if (ch == '\n' || ch == EOF)
288 return ch;
289
290 /* At this point, ch must be a '*' */
291 while ((ch = (*get) ()) == '*')
292 {
293 ;
294 }
295 if (ch == EOF || ch == '/')
296 break;
297 (*unget) (ch);
298 }
299 state = old_state;
300 return ' ';
301
302 case 4:
303 ch = (*get) ();
304 if (ch == EOF || (ch >= '0' && ch <= '9'))
305 return ch;
306 else
307 {
308 while (ch != EOF && IS_WHITESPACE (ch))
309 ch = (*get) ();
310 if (ch == '"')
311 {
312 (*unget) (ch);
313 out_string = "\n\t.appfile ";
314 old_state = 7;
315 state = -1;
316 return *out_string++;
317 }
318 else
319 {
320 while (ch != EOF && ch != '\n')
321 ch = (*get) ();
322 state = 0;
323 return ch;
324 }
325 }
326
327 case 5:
328 ch = (*get) ();
329 if (lex[ch] == LEX_IS_STRINGQUOTE)
330 {
331 state = old_state;
332 return ch;
333 }
334 else if (ch == '\\')
335 {
336 state = 6;
337 return ch;
338 }
339 else if (ch == EOF)
340 {
341 as_warn ("End of file in string: inserted '\"'");
342 state = old_state;
343 (*unget) ('\n');
344 return '"';
345 }
346 else
347 {
348 return ch;
349 }
350
351 case 6:
352 state = 5;
353 ch = (*get) ();
354 switch (ch)
355 {
356 /* Handle strings broken across lines, by turning '\n' into
357 '\\' and 'n'. */
358 case '\n':
359 (*unget) ('n');
360 add_newlines++;
361 return '\\';
362
363 case '"':
364 case '\\':
365 case 'b':
366 case 'f':
367 case 'n':
368 case 'r':
369 case 't':
370 #ifdef BACKSLASH_V
371 case 'v':
372 #endif /* BACKSLASH_V */
373 case 'x':
374 case 'X':
375 case '0':
376 case '1':
377 case '2':
378 case '3':
379 case '4':
380 case '5':
381 case '6':
382 case '7':
383 break;
384 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
385 default:
386 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
387 break;
388 #else /* ONLY_STANDARD_ESCAPES */
389 default:
390 /* Accept \x as x for any x */
391 break;
392 #endif /* ONLY_STANDARD_ESCAPES */
393
394 case EOF:
395 as_warn ("End of file in string: '\"' inserted");
396 return '"';
397 }
398 return ch;
399
400 case 7:
401 ch = (*get) ();
402 state = 5;
403 old_state = 8;
404 return ch;
405
406 case 8:
407 do
408 ch = (*get) ();
409 while (ch != '\n');
410 state = 0;
411 return ch;
412 }
413
414 /* OK, we are somewhere in states 0 through 4 or 9 through 10 */
415
416 /* flushchar: */
417 ch = (*get) ();
418 recycle:
419 if (ch == EOF)
420 {
421 if (state != 0)
422 as_warn ("End of file not at end of a line: Newline inserted.");
423 return ch;
424 }
425
426 switch (lex[ch])
427 {
428 case LEX_IS_WHITESPACE:
429 do
430 /* Preserve a single whitespace character at the beginning of
431 a line. */
432 if (state == 0)
433 {
434 state = 1;
435 return ch;
436 }
437 else
438 ch = (*get) ();
439 while (ch != EOF && IS_WHITESPACE (ch));
440 if (ch == EOF)
441 return ch;
442
443 if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
444 {
445 /* cpp never outputs a leading space before the #, so try to
446 avoid being confused. */
447 not_cpp_line = 1;
448 goto recycle;
449 }
450
451 /* If we're in state 2, we've seen a non-white
452 character followed by whitespace. If the next
453 character is ':', this is whitespace after a label
454 name which we can ignore. */
455 if (state == 2 && lex[ch] == LEX_IS_COLON)
456 {
457 state = 0;
458 return ch;
459 }
460
461 #if defined (LABELS_WITHOUT_COLONS) || defined (MRI)
462 /* Like above, but handles case where labels are not
463 required to have colons (and therefore must be identified
464 by their *position* in the input stream.) For a testcase
465 see hppa/more.parse/labelbug.s.
466
467 This also has the effect of sometimes leaving a whitespace
468 before a newline. Instead of trying to rework this horribly
469 broken and hairy code I'm just going to zap the extra space here. */
470 if (state == 2 && lex[ch] == LEX_IS_SYMBOL_COMPONENT)
471 {
472 (*unget) (ch);
473 return ' ';
474 }
475
476 /* Don't emit a space before a newline. */
477 if (state == 2 && lex[ch] == LEX_IS_NEWLINE)
478 {
479 state = 0;
480 return ch;
481 }
482 #endif
483
484 switch (state)
485 {
486 case 0:
487 state++;
488 goto recycle; /* Punted leading sp */
489 case 1:
490 /* We can arrive here if we leave a leading whitespace character
491 at the beginning of a line. */
492 goto recycle;
493 case 2:
494 state = 3;
495 (*unget) (ch);
496 return ' '; /* Sp after opco */
497 case 3:
498 goto recycle; /* Sp in operands */
499 case 9:
500 case 10:
501 state = 10; /* Sp after symbol char */
502 goto recycle;
503 default:
504 BAD_CASE (state);
505 }
506 break;
507
508 case LEX_IS_TWOCHAR_COMMENT_1ST:
509 ch2 = (*get) ();
510 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
511 {
512 for (;;)
513 {
514 do
515 {
516 ch2 = (*get) ();
517 if (ch2 != EOF && IS_NEWLINE (ch2))
518 add_newlines++;
519 }
520 while (ch2 != EOF &&
521 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
522
523 while (ch2 != EOF &&
524 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
525 {
526 ch2 = (*get) ();
527 }
528
529 if (ch2 == EOF
530 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
531 break;
532 (*unget) (ch);
533 }
534 if (ch2 == EOF)
535 as_warn ("End of file in multiline comment");
536
537 ch = ' ';
538 goto recycle;
539 }
540 else
541 {
542 if (ch2 != EOF)
543 (*unget) (ch2);
544 if (state == 9 || state == 10)
545 state = 3;
546 return ch;
547 }
548 break;
549
550 case LEX_IS_STRINGQUOTE:
551 if (state == 9 || state == 10)
552 old_state = 3;
553 else
554 old_state = state;
555 state = 5;
556 return ch;
557 #ifndef MRI
558 #ifndef IEEE_STYLE
559 case LEX_IS_ONECHAR_QUOTE:
560 ch = (*get) ();
561 if (ch == EOF)
562 {
563 as_warn ("End-of-file after a one-character quote; \\000 inserted");
564 ch = 0;
565 }
566 if (ch == '\\')
567 {
568 ch = (*get) ();
569 ch = process_escape (ch);
570 }
571 sprintf (out_buf, "%d", (int) (unsigned char) ch);
572
573
574 /* None of these 'x constants for us. We want 'x'. */
575 if ((ch = (*get) ()) != '\'')
576 {
577 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
578 as_warn ("Missing close quote: (assumed)");
579 #else
580 (*unget) (ch);
581 #endif
582 }
583 if (strlen (out_buf) == 1)
584 {
585 return out_buf[0];
586 }
587 if (state == 9 || state == 10)
588 old_state = 3;
589 else
590 old_state = state;
591 state = -1;
592 out_string = out_buf;
593 return *out_string++;
594 #endif
595 #endif
596 case LEX_IS_COLON:
597 if (state == 9 || state == 10)
598 state = 3;
599 else if (state != 3)
600 state = 0;
601 return ch;
602
603 case LEX_IS_NEWLINE:
604 /* Roll out a bunch of newlines from inside comments, etc. */
605 if (add_newlines)
606 {
607 --add_newlines;
608 (*unget) (ch);
609 }
610 /* fall thru into... */
611
612 case LEX_IS_LINE_SEPARATOR:
613 state = 0;
614 return ch;
615
616 case LEX_IS_LINE_COMMENT_START:
617 if (state == 0) /* Only comment at start of line. */
618 {
619 /* FIXME-someday: The two character comment stuff was badly
620 thought out. On i386, we want '/' as line comment start
621 AND we want C style comments. hence this hack. The
622 whole lexical process should be reworked. xoxorich. */
623 if (ch == '/')
624 {
625 ch2 = (*get) ();
626 if (ch2 == '*')
627 {
628 state = -2;
629 return (do_scrub_next_char (get, unget));
630 }
631 else
632 {
633 (*unget) (ch2);
634 }
635 } /* bad hack */
636
637 if (ch != '#')
638 not_cpp_line = 1;
639
640 do
641 ch = (*get) ();
642 while (ch != EOF && IS_WHITESPACE (ch));
643 if (ch == EOF)
644 {
645 as_warn ("EOF in comment: Newline inserted");
646 return '\n';
647 }
648 if (ch < '0' || ch > '9' || not_cpp_line)
649 {
650 /* Non-numerics: Eat whole comment line */
651 while (ch != EOF && !IS_NEWLINE (ch))
652 ch = (*get) ();
653 if (ch == EOF)
654 as_warn ("EOF in Comment: Newline inserted");
655 state = 0;
656 return '\n';
657 }
658 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
659 (*unget) (ch);
660 old_state = 4;
661 state = -1;
662 out_string = "\t.appline ";
663 return *out_string++;
664 }
665
666 /* We have a line comment character which is not at the start of
667 a line. If this is also a normal comment character, fall
668 through. Otherwise treat it as a default character. */
669 if (strchr (comment_chars, ch) == NULL)
670 goto de_fault;
671 /* Fall through. */
672 case LEX_IS_COMMENT_START:
673 do
674 ch = (*get) ();
675 while (ch != EOF && !IS_NEWLINE (ch));
676 if (ch == EOF)
677 as_warn ("EOF in comment: Newline inserted");
678 state = 0;
679 return '\n';
680
681 case LEX_IS_SYMBOL_COMPONENT:
682 if (state == 10)
683 {
684 /* This is a symbol character following another symbol
685 character, with whitespace in between. We skipped the
686 whitespace earlier, so output it now. */
687 (*unget) (ch);
688 state = 3;
689 return ' ';
690 }
691 if (state == 3)
692 state = 9;
693 /* Fall through. */
694 default:
695 de_fault:
696 /* Some relatively `normal' character. */
697 if (state == 0)
698 {
699 state = 2; /* Now seeing opcode */
700 return ch;
701 }
702 else if (state == 1)
703 {
704 state = 2; /* Ditto */
705 return ch;
706 }
707 else if (state == 9)
708 {
709 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
710 state = 3;
711 return ch;
712 }
713 else if (state == 10)
714 {
715 state = 3;
716 return ch;
717 }
718 else
719 {
720 return ch; /* Opcode or operands already */
721 }
722 }
723 return -1;
724 }
725
726 #ifdef TEST
727
728 const char comment_chars[] = "|";
729 const char line_comment_chars[] = "#";
730
731 main ()
732 {
733 int ch;
734
735 app_begin ();
736 while ((ch = do_scrub_next_char (stdin)) != EOF)
737 putc (ch, stdout);
738 }
739
740 as_warn (str)
741 char *str;
742 {
743 fputs (str, stderr);
744 putc ('\n', stderr);
745 }
746
747 #endif
748
749 /* end of app.c */
This page took 0.043401 seconds and 4 git commands to generate.