01fea74287e38ab443b66023164fc0bbdd2049ff
[deliverable/binutils-gdb.git] / gas / app.c
1 /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
2
3 Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
4 */
5 /* This is the Assembler Pre-Processor
6 Copyright (C) 1987 Free Software Foundation, Inc.
7
8 This file is part of GAS, the GNU Assembler.
9
10 GAS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 GAS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GAS; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
23
24 /* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
26 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
27 pair. This needs better error-handling.
28 */
29
30 #include <stdio.h>
31 #include "as.h" /* For BAD_CASE() only */
32
33 #if (__STDC__ != 1) && !defined(const)
34 #define const /* Nothing */
35 #endif
36
37 static char lex[256];
38 static const char symbol_chars[] =
39 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
40
41 #define LEX_IS_SYMBOL_COMPONENT 1
42 #define LEX_IS_WHITESPACE 2
43 #define LEX_IS_LINE_SEPARATOR 3
44 #define LEX_IS_COMMENT_START 4
45 #define LEX_IS_LINE_COMMENT_START 5
46 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
47 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
48 #define LEX_IS_STRINGQUOTE 8
49 #define LEX_IS_COLON 9
50 #define LEX_IS_NEWLINE 10
51 #define LEX_IS_ONECHAR_QUOTE 11
52 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
53 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
54 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
55 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
56 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
57 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
58
59 /* FIXME-soon: The entire lexer/parser thingy should be
60 built statically at compile time rather than dynamically
61 each and every time the assembler is run. xoxorich. */
62
63 void
64 do_scrub_begin ()
65 {
66 const char *p;
67
68 lex[' '] = LEX_IS_WHITESPACE;
69 lex['\t'] = LEX_IS_WHITESPACE;
70 lex['\n'] = LEX_IS_NEWLINE;
71 lex[';'] = LEX_IS_LINE_SEPARATOR;
72 lex['"'] = LEX_IS_STRINGQUOTE;
73 lex['\''] = LEX_IS_ONECHAR_QUOTE;
74 lex[':'] = LEX_IS_COLON;
75
76
77
78 #ifdef SINGLE_QUOTE_STRINGS
79 lex['\''] = LEX_IS_STRINGQUOTE;
80 #endif
81
82 /* Note that these override the previous defaults, e.g. if ';'
83
84 is a comment char, then it isn't a line separator. */
85 for (p = symbol_chars; *p; ++p)
86 {
87 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
88 } /* declare symbol characters */
89
90 for (p = comment_chars; *p; p++)
91 {
92 lex[*p] = LEX_IS_COMMENT_START;
93 } /* declare comment chars */
94
95 for (p = line_comment_chars; *p; p++)
96 {
97 lex[*p] = LEX_IS_LINE_COMMENT_START;
98 } /* declare line comment chars */
99
100 for (p = line_separator_chars; *p; p++)
101 {
102 lex[*p] = LEX_IS_LINE_SEPARATOR;
103 } /* declare line separators */
104
105 /* Only allow slash-star comments if slash is not in use */
106 if (lex['/'] == 0)
107 {
108 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
109 }
110 /* FIXME-soon. This is a bad hack but otherwise, we
111 can't do c-style comments when '/' is a line
112 comment char. xoxorich. */
113 if (lex['*'] == 0)
114 {
115 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
116 }
117 } /* do_scrub_begin() */
118
119 FILE *scrub_file;
120
121 int
122 scrub_from_file ()
123 {
124 return getc (scrub_file);
125 }
126
127 void
128 scrub_to_file (ch)
129 int ch;
130 {
131 ungetc (ch, scrub_file);
132 } /* scrub_to_file() */
133
134 char *scrub_string;
135 char *scrub_last_string;
136
137 int
138 scrub_from_string ()
139 {
140 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
141 } /* scrub_from_string() */
142
143 void
144 scrub_to_string (ch)
145 int ch;
146 {
147 *--scrub_string = ch;
148 } /* scrub_to_string() */
149
150 /* Saved state of the scrubber */
151 static int state;
152 static int old_state;
153 static char *out_string;
154 static char out_buf[20];
155 static int add_newlines = 0;
156
157 /* Data structure for saving the state of app across #include's. Note that
158 app is called asynchronously to the parsing of the .include's, so our
159 state at the time .include is interpreted is completely unrelated.
160 That's why we have to save it all. */
161
162 struct app_save
163 {
164 int state;
165 int old_state;
166 char *out_string;
167 char out_buf[sizeof (out_buf)];
168 int add_newlines;
169 char *scrub_string;
170 char *scrub_last_string;
171 FILE *scrub_file;
172 };
173
174 char *
175 app_push ()
176 {
177 register struct app_save *saved;
178
179 saved = (struct app_save *) xmalloc (sizeof (*saved));
180 saved->state = state;
181 saved->old_state = old_state;
182 saved->out_string = out_string;
183 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
184 saved->add_newlines = add_newlines;
185 saved->scrub_string = scrub_string;
186 saved->scrub_last_string = scrub_last_string;
187 saved->scrub_file = scrub_file;
188
189 /* do_scrub_begin() is not useful, just wastes time. */
190 return (char *) saved;
191 }
192
193 void
194 app_pop (arg)
195 char *arg;
196 {
197 register struct app_save *saved = (struct app_save *) arg;
198
199 /* There is no do_scrub_end (). */
200 state = saved->state;
201 old_state = saved->old_state;
202 out_string = saved->out_string;
203 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
204 add_newlines = saved->add_newlines;
205 scrub_string = saved->scrub_string;
206 scrub_last_string = saved->scrub_last_string;
207 scrub_file = saved->scrub_file;
208
209 free (arg);
210 } /* app_pop() */
211
212 /* @@ This assumes that \n &c are the same on host and target. This is not
213 necessarily true. */
214 int
215 process_escape (ch)
216 char ch;
217 {
218 switch (ch)
219 {
220 case 'b':
221 return '\b';
222 case 'f':
223 return '\f';
224 case 'n':
225 return '\n';
226 case 'r':
227 return '\r';
228 case 't':
229 return '\t';
230 case '\'':
231 return '\'';
232 case '"':
233 return '\"';
234 default:
235 return ch;
236 }
237 }
238 int
239 do_scrub_next_char (get, unget)
240 int (*get) ();
241 void (*unget) ();
242 {
243 /*State 0: beginning of normal line
244 1: After first whitespace on line (flush more white)
245 2: After first non-white (opcode) on line (keep 1white)
246 3: after second white on line (into operands) (flush white)
247 4: after putting out a .line, put out digits
248 5: parsing a string, then go to old-state
249 6: putting out \ escape in a "d string.
250 7: After putting out a .appfile, put out string.
251 8: After putting out a .appfile string, flush until newline.
252 9: After seeing symbol char in state 3 (keep 1white after symchar)
253 10: After seeing whitespace in state 9 (keep white before symchar)
254 -1: output string in out_string and go to the state in old_state
255 -2: flush text until a '*' '/' is seen, then go to state old_state
256 */
257
258 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
259 constructs like ``.loc 1 20''. This was turning into ``.loc
260 120''. States 9 and 10 ensure that a space is never dropped in
261 between characters which could appear in a identifier. Ian
262 Taylor, ian@cygnus.com. */
263
264 register int ch, ch2 = 0;
265
266 switch (state)
267 {
268 case -1:
269 ch = *out_string++;
270 if (*out_string == 0)
271 {
272 state = old_state;
273 old_state = 3;
274 }
275 return ch;
276
277 case -2:
278 for (;;)
279 {
280 do
281 {
282 ch = (*get) ();
283 }
284 while (ch != EOF && ch != '\n' && ch != '*');
285 if (ch == '\n' || ch == EOF)
286 return ch;
287
288 /* At this point, ch must be a '*' */
289 while ((ch = (*get) ()) == '*')
290 {
291 ;
292 }
293 if (ch == EOF || ch == '/')
294 break;
295 (*unget) (ch);
296 }
297 state = old_state;
298 return ' ';
299
300 case 4:
301 ch = (*get) ();
302 if (ch == EOF || (ch >= '0' && ch <= '9'))
303 return ch;
304 else
305 {
306 while (ch != EOF && IS_WHITESPACE (ch))
307 ch = (*get) ();
308 if (ch == '"')
309 {
310 (*unget) (ch);
311 out_string = "\n.appfile ";
312 old_state = 7;
313 state = -1;
314 return *out_string++;
315 }
316 else
317 {
318 while (ch != EOF && ch != '\n')
319 ch = (*get) ();
320 return ch;
321 }
322 }
323
324 case 5:
325 ch = (*get) ();
326 if (lex[ch] == LEX_IS_STRINGQUOTE)
327 {
328 state = old_state;
329 return ch;
330 }
331 else if (ch == '\\')
332 {
333 state = 6;
334 return ch;
335 }
336 else if (ch == EOF)
337 {
338 as_warn ("End of file in string: inserted '\"'");
339 state = old_state;
340 (*unget) ('\n');
341 return '"';
342 }
343 else
344 {
345 return ch;
346 }
347
348 case 6:
349 state = 5;
350 ch = (*get) ();
351 switch (ch)
352 {
353 /* Handle strings broken across lines, by turning '\n' into
354 '\\' and 'n'. */
355 case '\n':
356 (*unget) ('n');
357 add_newlines++;
358 return '\\';
359
360 case '"':
361 case '\\':
362 case 'b':
363 case 'f':
364 case 'n':
365 case 'r':
366 case 't':
367 #ifdef BACKSLASH_V
368 case 'v':
369 #endif /* BACKSLASH_V */
370 case '0':
371 case '1':
372 case '2':
373 case '3':
374 case '4':
375 case '5':
376 case '6':
377 case '7':
378 break;
379 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
380 default:
381 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
382 break;
383 #else /* ONLY_STANDARD_ESCAPES */
384 default:
385 /* Accept \x as x for any x */
386 break;
387 #endif /* ONLY_STANDARD_ESCAPES */
388
389 case EOF:
390 as_warn ("End of file in string: '\"' inserted");
391 return '"';
392 }
393 return ch;
394
395 case 7:
396 ch = (*get) ();
397 state = 5;
398 old_state = 8;
399 return ch;
400
401 case 8:
402 do
403 ch = (*get) ();
404 while (ch != '\n');
405 state = 0;
406 return ch;
407 }
408
409 /* OK, we are somewhere in states 0 through 4 or 9 through 10 */
410
411 /* flushchar: */
412 ch = (*get) ();
413 recycle:
414 if (ch == EOF)
415 {
416 if (state != 0)
417 as_warn ("End of file not at end of a line: Newline inserted.");
418 return ch;
419 }
420
421 switch (lex[ch])
422 {
423 case LEX_IS_WHITESPACE:
424 do
425 ch = (*get) ();
426 while (ch != EOF && IS_WHITESPACE (ch));
427 if (ch == EOF)
428 return ch;
429
430 if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
431 {
432 goto recycle;
433 }
434 #ifdef MRI
435 (*unget) (ch); /* Put back */
436 return ' '; /* Always return one space at start of line */
437 #endif
438
439 /* If we're in state 2, we've seen a non-white
440 character followed by whitespace. If the next
441 character is ':', this is whitespace after a label
442 name which we can ignore. */
443 if (state == 2 && lex[ch] == LEX_IS_COLON)
444 {
445 state = 0;
446 return ch;
447 }
448
449 switch (state)
450 {
451 case 0:
452 state++;
453 goto recycle; /* Punted leading sp */
454 case 1:
455 BAD_CASE (state); /* We can't get here */
456 case 2:
457 state = 3;
458 (*unget) (ch);
459 return ' '; /* Sp after opco */
460 case 3:
461 goto recycle; /* Sp in operands */
462 case 9:
463 case 10:
464 state = 10; /* Sp after symbol char */
465 goto recycle;
466 default:
467 BAD_CASE (state);
468 }
469 break;
470
471 case LEX_IS_TWOCHAR_COMMENT_1ST:
472 ch2 = (*get) ();
473 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
474 {
475 for (;;)
476 {
477 do
478 {
479 ch2 = (*get) ();
480 if (ch2 != EOF && IS_NEWLINE (ch2))
481 add_newlines++;
482 }
483 while (ch2 != EOF &&
484 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
485
486 while (ch2 != EOF &&
487 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
488 {
489 ch2 = (*get) ();
490 }
491
492 if (ch2 == EOF
493 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
494 break;
495 (*unget) (ch);
496 }
497 if (ch2 == EOF)
498 as_warn ("End of file in multiline comment");
499
500 ch = ' ';
501 goto recycle;
502 }
503 else
504 {
505 if (ch2 != EOF)
506 (*unget) (ch2);
507 if (state == 9 || state == 10)
508 state = 3;
509 return ch;
510 }
511 break;
512
513 case LEX_IS_STRINGQUOTE:
514 if (state == 9 || state == 10)
515 old_state = 3;
516 else
517 old_state = state;
518 state = 5;
519 return ch;
520 #ifndef MRI
521 #ifndef IEEE_STYLE
522 case LEX_IS_ONECHAR_QUOTE:
523 ch = (*get) ();
524 if (ch == EOF)
525 {
526 as_warn ("End-of-file after a one-character quote; \\000 inserted");
527 ch = 0;
528 }
529 if (ch == '\\')
530 {
531 ch = (*get) ();
532 ch = process_escape (ch);
533 }
534 sprintf (out_buf, "%d", (int) (unsigned char) ch);
535
536
537 /* None of these 'x constants for us. We want 'x'. */
538 if ((ch = (*get) ()) != '\'')
539 {
540 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
541 as_warn ("Missing close quote: (assumed)");
542 #else
543 (*unget) (ch);
544 #endif
545 }
546 if (strlen (out_buf) == 1)
547 {
548 return out_buf[0];
549 }
550 if (state == 9 || state == 10)
551 old_state = 3;
552 else
553 old_state = state;
554 state = -1;
555 out_string = out_buf;
556 return *out_string++;
557 #endif
558 #endif
559 case LEX_IS_COLON:
560 if (state == 9 || state == 10)
561 state = 3;
562 else if (state != 3)
563 state = 0;
564 return ch;
565
566 case LEX_IS_NEWLINE:
567 /* Roll out a bunch of newlines from inside comments, etc. */
568 if (add_newlines)
569 {
570 --add_newlines;
571 (*unget) (ch);
572 }
573 /* fall thru into... */
574
575 case LEX_IS_LINE_SEPARATOR:
576 state = 0;
577 return ch;
578
579 case LEX_IS_LINE_COMMENT_START:
580 if (state == 0) /* Only comment at start of line. */
581 {
582 /* FIXME-someday: The two character comment stuff was badly
583 thought out. On i386, we want '/' as line comment start
584 AND we want C style comments. hence this hack. The
585 whole lexical process should be reworked. xoxorich. */
586 if (ch == '/')
587 {
588 ch2 = (*get) ();
589 if (ch2 == '*')
590 {
591 state = -2;
592 return (do_scrub_next_char (get, unget));
593 }
594 else
595 {
596 (*unget) (ch2);
597 }
598 } /* bad hack */
599
600 do
601 ch = (*get) ();
602 while (ch != EOF && IS_WHITESPACE (ch));
603 if (ch == EOF)
604 {
605 as_warn ("EOF in comment: Newline inserted");
606 return '\n';
607 }
608 if (ch < '0' || ch > '9')
609 {
610 /* Non-numerics: Eat whole comment line */
611 while (ch != EOF && !IS_NEWLINE (ch))
612 ch = (*get) ();
613 if (ch == EOF)
614 as_warn ("EOF in Comment: Newline inserted");
615 state = 0;
616 return '\n';
617 }
618 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
619 (*unget) (ch);
620 old_state = 4;
621 state = -1;
622 out_string = ".appline ";
623 return *out_string++;
624 }
625
626 /* We have a line comment character which is not at the start of
627 a line. If this is also a normal comment character, fall
628 through. Otherwise treat it as a default character. */
629 if (strchr (comment_chars, ch) == NULL)
630 goto de_fault;
631 /* Fall through. */
632 case LEX_IS_COMMENT_START:
633 do
634 ch = (*get) ();
635 while (ch != EOF && !IS_NEWLINE (ch));
636 if (ch == EOF)
637 as_warn ("EOF in comment: Newline inserted");
638 state = 0;
639 return '\n';
640
641 case LEX_IS_SYMBOL_COMPONENT:
642 if (state == 10)
643 {
644 /* This is a symbol character following another symbol
645 character, with whitespace in between. We skipped the
646 whitespace earlier, so output it now. */
647 (*unget) (ch);
648 state = 3;
649 return ' ';
650 }
651 if (state == 3)
652 state = 9;
653 /* Fall through. */
654 default:
655 de_fault:
656 /* Some relatively `normal' character. */
657 if (state == 0)
658 {
659 state = 2; /* Now seeing opcode */
660 return ch;
661 }
662 else if (state == 1)
663 {
664 state = 2; /* Ditto */
665 return ch;
666 }
667 else if (state == 9)
668 {
669 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
670 state = 3;
671 return ch;
672 }
673 else if (state == 10)
674 {
675 state = 3;
676 return ch;
677 }
678 else
679 {
680 return ch; /* Opcode or operands already */
681 }
682 }
683 return -1;
684 }
685
686 #ifdef TEST
687
688 const char comment_chars[] = "|";
689 const char line_comment_chars[] = "#";
690
691 main ()
692 {
693 int ch;
694
695 app_begin ();
696 while ((ch = do_scrub_next_char (stdin)) != EOF)
697 putc (ch, stdout);
698 }
699
700 as_warn (str)
701 char *str;
702 {
703 fputs (str, stderr);
704 putc ('\n', stderr);
705 }
706
707 #endif
708
709 /* end of app.c */
This page took 0.048453 seconds and 4 git commands to generate.