* hppa-tdep.c (frame_saved_pc): Don't try to dig a return pointer
[deliverable/binutils-gdb.git] / gas / app.c
1 /* This is the Assembler Pre-Processor
2 Copyright (C) 1987, 1990, 1991, 1992, 1994 Free Software Foundation, Inc.
3
4 This file is part of GAS, the GNU Assembler.
5
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GAS is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19
20 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
21 /* App, the assembler pre-processor. This pre-processor strips out excess
22 spaces, turns single-quoted characters into a decimal constant, and turns
23 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
24 pair. This needs better error-handling. */
25
26 #include <stdio.h>
27 #include "as.h" /* For BAD_CASE() only */
28
29 #if (__STDC__ != 1)
30 #ifndef const
31 #define const /* empty */
32 #endif
33 #endif
34
35 static char lex[256];
36 static const char symbol_chars[] =
37 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
38
39 #define LEX_IS_SYMBOL_COMPONENT 1
40 #define LEX_IS_WHITESPACE 2
41 #define LEX_IS_LINE_SEPARATOR 3
42 #define LEX_IS_COMMENT_START 4
43 #define LEX_IS_LINE_COMMENT_START 5
44 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
45 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
46 #define LEX_IS_STRINGQUOTE 8
47 #define LEX_IS_COLON 9
48 #define LEX_IS_NEWLINE 10
49 #define LEX_IS_ONECHAR_QUOTE 11
50 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
51 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
52 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
53 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
54 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
55 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
56
57 static int process_escape PARAMS ((int));
58
59 /* FIXME-soon: The entire lexer/parser thingy should be
60 built statically at compile time rather than dynamically
61 each and every time the assembler is run. xoxorich. */
62
63 void
64 do_scrub_begin ()
65 {
66 const char *p;
67
68 lex[' '] = LEX_IS_WHITESPACE;
69 lex['\t'] = LEX_IS_WHITESPACE;
70 lex['\n'] = LEX_IS_NEWLINE;
71 lex[';'] = LEX_IS_LINE_SEPARATOR;
72 lex['"'] = LEX_IS_STRINGQUOTE;
73 #ifndef TC_HPPA
74 lex['\''] = LEX_IS_ONECHAR_QUOTE;
75 #endif
76 lex[':'] = LEX_IS_COLON;
77
78
79
80 #ifdef SINGLE_QUOTE_STRINGS
81 lex['\''] = LEX_IS_STRINGQUOTE;
82 #endif
83
84 /* Note that these override the previous defaults, e.g. if ';' is a
85 comment char, then it isn't a line separator. */
86 for (p = symbol_chars; *p; ++p)
87 {
88 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
89 } /* declare symbol characters */
90
91 for (p = comment_chars; *p; p++)
92 {
93 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
94 } /* declare comment chars */
95
96 for (p = line_comment_chars; *p; p++)
97 {
98 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
99 } /* declare line comment chars */
100
101 for (p = line_separator_chars; *p; p++)
102 {
103 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
104 } /* declare line separators */
105
106 /* Only allow slash-star comments if slash is not in use */
107 if (lex['/'] == 0)
108 {
109 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
110 }
111 /* FIXME-soon. This is a bad hack but otherwise, we can't do
112 c-style comments when '/' is a line comment char. xoxorich. */
113 if (lex['*'] == 0)
114 {
115 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
116 }
117 } /* do_scrub_begin() */
118
119 FILE *scrub_file;
120
121 int
122 scrub_from_file ()
123 {
124 return getc (scrub_file);
125 }
126
127 void
128 scrub_to_file (ch)
129 int ch;
130 {
131 ungetc (ch, scrub_file);
132 } /* scrub_to_file() */
133
134 char *scrub_string;
135 char *scrub_last_string;
136
137 int
138 scrub_from_string ()
139 {
140 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
141 } /* scrub_from_string() */
142
143 void
144 scrub_to_string (ch)
145 int ch;
146 {
147 *--scrub_string = ch;
148 } /* scrub_to_string() */
149
150 /* Saved state of the scrubber */
151 static int state;
152 static int old_state;
153 static char *out_string;
154 static char out_buf[20];
155 static int add_newlines = 0;
156
157 /* Data structure for saving the state of app across #include's. Note that
158 app is called asynchronously to the parsing of the .include's, so our
159 state at the time .include is interpreted is completely unrelated.
160 That's why we have to save it all. */
161
162 struct app_save
163 {
164 int state;
165 int old_state;
166 char *out_string;
167 char out_buf[sizeof (out_buf)];
168 int add_newlines;
169 char *scrub_string;
170 char *scrub_last_string;
171 FILE *scrub_file;
172 };
173
174 char *
175 app_push ()
176 {
177 register struct app_save *saved;
178
179 saved = (struct app_save *) xmalloc (sizeof (*saved));
180 saved->state = state;
181 saved->old_state = old_state;
182 saved->out_string = out_string;
183 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
184 saved->add_newlines = add_newlines;
185 saved->scrub_string = scrub_string;
186 saved->scrub_last_string = scrub_last_string;
187 saved->scrub_file = scrub_file;
188
189 /* do_scrub_begin() is not useful, just wastes time. */
190 return (char *) saved;
191 }
192
193 void
194 app_pop (arg)
195 char *arg;
196 {
197 register struct app_save *saved = (struct app_save *) arg;
198
199 /* There is no do_scrub_end (). */
200 state = saved->state;
201 old_state = saved->old_state;
202 out_string = saved->out_string;
203 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
204 add_newlines = saved->add_newlines;
205 scrub_string = saved->scrub_string;
206 scrub_last_string = saved->scrub_last_string;
207 scrub_file = saved->scrub_file;
208
209 free (arg);
210 } /* app_pop() */
211
212 /* @@ This assumes that \n &c are the same on host and target. This is not
213 necessarily true. */
214 static int
215 process_escape (ch)
216 int ch;
217 {
218 switch (ch)
219 {
220 case 'b':
221 return '\b';
222 case 'f':
223 return '\f';
224 case 'n':
225 return '\n';
226 case 'r':
227 return '\r';
228 case 't':
229 return '\t';
230 case '\'':
231 return '\'';
232 case '"':
233 return '\"';
234 default:
235 return ch;
236 }
237 }
238 int
239 do_scrub_next_char (get, unget)
240 int (*get) ();
241 void (*unget) ();
242 {
243 /*State 0: beginning of normal line
244 1: After first whitespace on line (flush more white)
245 2: After first non-white (opcode) on line (keep 1white)
246 3: after second white on line (into operands) (flush white)
247 4: after putting out a .line, put out digits
248 5: parsing a string, then go to old-state
249 6: putting out \ escape in a "d string.
250 7: After putting out a .appfile, put out string.
251 8: After putting out a .appfile string, flush until newline.
252 9: After seeing symbol char in state 3 (keep 1white after symchar)
253 10: After seeing whitespace in state 9 (keep white before symchar)
254 11: After seeing a symbol character in state 0 (eg a label definition)
255 -1: output string in out_string and go to the state in old_state
256 -2: flush text until a '*' '/' is seen, then go to state old_state
257 */
258
259 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
260 constructs like ``.loc 1 20''. This was turning into ``.loc
261 120''. States 9 and 10 ensure that a space is never dropped in
262 between characters which could appear in a identifier. Ian
263 Taylor, ian@cygnus.com.
264
265 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
266 correctly on the PA (and any other target where colons are optional).
267 Jeff Law, law@cs.utah.edu. */
268
269 /* This is purely an optimization hack, and relies on gcc's inlining
270 capability. */
271 #if defined (__GNUC__) && defined (__OPTIMIZE__)
272 #define GET() (get == scrub_from_file ? scrub_from_file () : (*get) ())
273 #else
274 #define GET() ((*get) ())
275 #endif
276
277 register int ch, ch2 = 0;
278 int not_cpp_line = 0;
279
280 switch (state)
281 {
282 case -1:
283 ch = *out_string++;
284 if (*out_string == 0)
285 {
286 state = old_state;
287 old_state = 3;
288 }
289 return ch;
290
291 case -2:
292 for (;;)
293 {
294 do
295 {
296 ch = GET ();
297 }
298 while (ch != EOF && ch != '\n' && ch != '*');
299 if (ch == '\n' || ch == EOF)
300 return ch;
301
302 /* At this point, ch must be a '*' */
303 while ((ch = GET ()) == '*')
304 {
305 ;
306 }
307 if (ch == EOF || ch == '/')
308 break;
309 (*unget) (ch);
310 }
311 state = old_state;
312 return ' ';
313
314 case 4:
315 ch = GET ();
316 if (ch == EOF || (ch >= '0' && ch <= '9'))
317 return ch;
318 else
319 {
320 while (ch != EOF && IS_WHITESPACE (ch))
321 ch = GET ();
322 if (ch == '"')
323 {
324 (*unget) (ch);
325 out_string = "\n\t.appfile ";
326 old_state = 7;
327 state = -1;
328 return *out_string++;
329 }
330 else
331 {
332 while (ch != EOF && ch != '\n')
333 ch = GET ();
334 state = 0;
335 return ch;
336 }
337 }
338
339 case 5:
340 ch = GET ();
341 if (lex[ch] == LEX_IS_STRINGQUOTE)
342 {
343 state = old_state;
344 return ch;
345 }
346 #ifndef NO_STRING_ESCAPES
347 else if (ch == '\\')
348 {
349 state = 6;
350 return ch;
351 }
352 #endif
353 else if (ch == EOF)
354 {
355 as_warn ("End of file in string: inserted '\"'");
356 state = old_state;
357 (*unget) ('\n');
358 return '"';
359 }
360 else
361 {
362 return ch;
363 }
364
365 case 6:
366 state = 5;
367 ch = GET ();
368 switch (ch)
369 {
370 /* Handle strings broken across lines, by turning '\n' into
371 '\\' and 'n'. */
372 case '\n':
373 (*unget) ('n');
374 add_newlines++;
375 return '\\';
376
377 case '"':
378 case '\\':
379 case 'b':
380 case 'f':
381 case 'n':
382 case 'r':
383 case 't':
384 #ifdef BACKSLASH_V
385 case 'v':
386 #endif /* BACKSLASH_V */
387 case 'x':
388 case 'X':
389 case '0':
390 case '1':
391 case '2':
392 case '3':
393 case '4':
394 case '5':
395 case '6':
396 case '7':
397 break;
398 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
399 default:
400 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
401 break;
402 #else /* ONLY_STANDARD_ESCAPES */
403 default:
404 /* Accept \x as x for any x */
405 break;
406 #endif /* ONLY_STANDARD_ESCAPES */
407
408 case EOF:
409 as_warn ("End of file in string: '\"' inserted");
410 return '"';
411 }
412 return ch;
413
414 case 7:
415 ch = GET ();
416 state = 5;
417 old_state = 8;
418 return ch;
419
420 case 8:
421 do
422 ch = GET ();
423 while (ch != '\n');
424 state = 0;
425 return ch;
426 }
427
428 /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
429
430 /* flushchar: */
431 ch = GET ();
432 recycle:
433 if (ch == EOF)
434 {
435 if (state != 0)
436 {
437 as_warn ("End of file not at end of a line: Newline inserted.");
438 state = 0;
439 return '\n';
440 }
441 return ch;
442 }
443
444 switch (lex[ch])
445 {
446 case LEX_IS_WHITESPACE:
447 do
448 /* Preserve a single whitespace character at the beginning of
449 a line. */
450 if (state == 0)
451 {
452 state = 1;
453 return ch;
454 }
455 else
456 ch = GET ();
457 while (ch != EOF && IS_WHITESPACE (ch));
458 if (ch == EOF)
459 return ch;
460
461 if (IS_COMMENT (ch)
462 || (state == 0 && IS_LINE_COMMENT (ch))
463 || ch == '/'
464 || IS_LINE_SEPARATOR (ch))
465 {
466 /* cpp never outputs a leading space before the #, so try to
467 avoid being confused. */
468 not_cpp_line = 1;
469 goto recycle;
470 }
471 #ifdef MRI
472 (*unget) (ch); /* Put back */
473 return ' '; /* Always return one space at start of line */
474 #endif
475
476 /* If we're in state 2 or 11, we've seen a non-white character
477 followed by whitespace. If the next character is ':', this
478 is whitespace after a label name which we *must* ignore. */
479 if ((state == 2 || state == 11) && lex[ch] == LEX_IS_COLON)
480 {
481 state = 1;
482 return ch;
483 }
484
485 switch (state)
486 {
487 case 0:
488 state++;
489 goto recycle; /* Punted leading sp */
490 case 1:
491 /* We can arrive here if we leave a leading whitespace character
492 at the beginning of a line. */
493 goto recycle;
494 case 2:
495 state = 3;
496 (*unget) (ch);
497 return ' '; /* Sp after opco */
498 case 3:
499 goto recycle; /* Sp in operands */
500 case 9:
501 case 10:
502 state = 10; /* Sp after symbol char */
503 goto recycle;
504 case 11:
505 state = 1;
506 (*unget) (ch);
507 return ' '; /* Sp after label definition. */
508 default:
509 BAD_CASE (state);
510 }
511 break;
512
513 case LEX_IS_TWOCHAR_COMMENT_1ST:
514 ch2 = GET ();
515 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
516 {
517 for (;;)
518 {
519 do
520 {
521 ch2 = GET ();
522 if (ch2 != EOF && IS_NEWLINE (ch2))
523 add_newlines++;
524 }
525 while (ch2 != EOF &&
526 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
527
528 while (ch2 != EOF &&
529 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
530 {
531 ch2 = GET ();
532 }
533
534 if (ch2 == EOF
535 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
536 break;
537 (*unget) (ch);
538 }
539 if (ch2 == EOF)
540 as_warn ("End of file in multiline comment");
541
542 ch = ' ';
543 goto recycle;
544 }
545 else
546 {
547 if (ch2 != EOF)
548 (*unget) (ch2);
549 if (state == 9 || state == 10)
550 state = 3;
551 return ch;
552 }
553 break;
554
555 case LEX_IS_STRINGQUOTE:
556 if (state == 9 || state == 10)
557 old_state = 3;
558 else
559 old_state = state;
560 state = 5;
561 return ch;
562 #ifndef MRI
563 #ifndef IEEE_STYLE
564 case LEX_IS_ONECHAR_QUOTE:
565 ch = GET ();
566 if (ch == EOF)
567 {
568 as_warn ("End-of-file after a one-character quote; \\000 inserted");
569 ch = 0;
570 }
571 if (ch == '\\')
572 {
573 ch = GET ();
574 ch = process_escape (ch);
575 }
576 sprintf (out_buf, "%d", (int) (unsigned char) ch);
577
578
579 /* None of these 'x constants for us. We want 'x'. */
580 if ((ch = GET ()) != '\'')
581 {
582 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
583 as_warn ("Missing close quote: (assumed)");
584 #else
585 (*unget) (ch);
586 #endif
587 }
588 if (strlen (out_buf) == 1)
589 {
590 return out_buf[0];
591 }
592 if (state == 9 || state == 10)
593 old_state = 3;
594 else
595 old_state = state;
596 state = -1;
597 out_string = out_buf;
598 return *out_string++;
599 #endif
600 #endif
601 case LEX_IS_COLON:
602 if (state == 9 || state == 10)
603 state = 3;
604 else if (state != 3)
605 state = 1;
606 return ch;
607
608 case LEX_IS_NEWLINE:
609 /* Roll out a bunch of newlines from inside comments, etc. */
610 if (add_newlines)
611 {
612 --add_newlines;
613 (*unget) (ch);
614 }
615 /* fall thru into... */
616
617 case LEX_IS_LINE_SEPARATOR:
618 state = 0;
619 return ch;
620
621 case LEX_IS_LINE_COMMENT_START:
622 if (state == 0) /* Only comment at start of line. */
623 {
624 /* FIXME-someday: The two character comment stuff was badly
625 thought out. On i386, we want '/' as line comment start
626 AND we want C style comments. hence this hack. The
627 whole lexical process should be reworked. xoxorich. */
628 if (ch == '/')
629 {
630 ch2 = GET ();
631 if (ch2 == '*')
632 {
633 state = -2;
634 return (do_scrub_next_char (get, unget));
635 }
636 else
637 {
638 (*unget) (ch2);
639 }
640 } /* bad hack */
641
642 if (ch != '#')
643 not_cpp_line = 1;
644
645 do
646 ch = GET ();
647 while (ch != EOF && IS_WHITESPACE (ch));
648 if (ch == EOF)
649 {
650 as_warn ("EOF in comment: Newline inserted");
651 return '\n';
652 }
653 if (ch < '0' || ch > '9' || not_cpp_line)
654 {
655 /* Non-numerics: Eat whole comment line */
656 while (ch != EOF && !IS_NEWLINE (ch))
657 ch = GET ();
658 if (ch == EOF)
659 as_warn ("EOF in Comment: Newline inserted");
660 state = 0;
661 return '\n';
662 }
663 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
664 (*unget) (ch);
665 old_state = 4;
666 state = -1;
667 out_string = "\t.appline ";
668 return *out_string++;
669 }
670
671 /* We have a line comment character which is not at the start of
672 a line. If this is also a normal comment character, fall
673 through. Otherwise treat it as a default character. */
674 if (strchr (comment_chars, ch) == NULL)
675 goto de_fault;
676 /* Fall through. */
677 case LEX_IS_COMMENT_START:
678 do
679 ch = GET ();
680 while (ch != EOF && !IS_NEWLINE (ch));
681 if (ch == EOF)
682 as_warn ("EOF in comment: Newline inserted");
683 state = 0;
684 return '\n';
685
686 case LEX_IS_SYMBOL_COMPONENT:
687 if (state == 10)
688 {
689 /* This is a symbol character following another symbol
690 character, with whitespace in between. We skipped the
691 whitespace earlier, so output it now. */
692 (*unget) (ch);
693 state = 3;
694 return ' ';
695 }
696 if (state == 3)
697 state = 9;
698 /* Fall through. */
699 default:
700 de_fault:
701 /* Some relatively `normal' character. */
702 if (state == 0)
703 {
704 state = 11; /* Now seeing label definition */
705 return ch;
706 }
707 else if (state == 1)
708 {
709 state = 2; /* Ditto */
710 return ch;
711 }
712 else if (state == 9)
713 {
714 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
715 state = 3;
716 return ch;
717 }
718 else if (state == 10)
719 {
720 state = 3;
721 return ch;
722 }
723 else
724 {
725 return ch; /* Opcode or operands already */
726 }
727 }
728 return -1;
729
730 #undef GET
731 }
732
733 #ifdef TEST
734
735 const char comment_chars[] = "|";
736 const char line_comment_chars[] = "#";
737
738 main ()
739 {
740 int ch;
741
742 app_begin ();
743 while ((ch = do_scrub_next_char (stdin)) != EOF)
744 putc (ch, stdout);
745 }
746
747 as_warn (str)
748 char *str;
749 {
750 fputs (str, stderr);
751 putc ('\n', stderr);
752 }
753
754 #endif
755
756 /* end of app.c */
This page took 0.043892 seconds and 4 git commands to generate.