* config/obj-bout.h (S_GET_VALUE): Removed unnecessary cast.
[deliverable/binutils-gdb.git] / gas / app.c
1 /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
2
3 Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
4 */
5 /* This is the Assembler Pre-Processor
6 Copyright (C) 1987 Free Software Foundation, Inc.
7
8 This file is part of GAS, the GNU Assembler.
9
10 GAS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 GAS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GAS; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
23
24 /* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
26 # <number> <filename> <garbage> into a .line <number>\n.app-file <filename>
27 pair. This needs better error-handling.
28 */
29
30 #include <stdio.h>
31 #include "as.h" /* For BAD_CASE() only */
32
33 #if (__STDC__ != 1) && !defined(const)
34 #define const /* Nothing */
35 #endif
36
37 static char lex[256];
38 static const char symbol_chars[] =
39 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
40
41 #define LEX_IS_SYMBOL_COMPONENT 1
42 #define LEX_IS_WHITESPACE 2
43 #define LEX_IS_LINE_SEPARATOR 3
44 #define LEX_IS_COMMENT_START 4
45 #define LEX_IS_LINE_COMMENT_START 5
46 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
47 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
48 #define LEX_IS_STRINGQUOTE 8
49 #define LEX_IS_COLON 9
50 #define LEX_IS_NEWLINE 10
51 #define LEX_IS_ONECHAR_QUOTE 11
52 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
53 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
54 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
55 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
56 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
57 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
58
59 /* FIXME-soon: The entire lexer/parser thingy should be
60 built statically at compile time rather than dynamically
61 each and every time the assembler is run. xoxorich. */
62
63 void
64 do_scrub_begin ()
65 {
66 const char *p;
67
68 lex[' '] = LEX_IS_WHITESPACE;
69 lex['\t'] = LEX_IS_WHITESPACE;
70 lex['\n'] = LEX_IS_NEWLINE;
71 lex[';'] = LEX_IS_LINE_SEPARATOR;
72 lex['"'] = LEX_IS_STRINGQUOTE;
73 lex['\''] = LEX_IS_ONECHAR_QUOTE;
74 lex[':'] = LEX_IS_COLON;
75
76
77
78 #ifdef SINGLE_QUOTE_STRINGS
79 lex['\''] = LEX_IS_STRINGQUOTE;
80 #endif
81
82 /* Note that these override the previous defaults, e.g. if ';'
83
84 is a comment char, then it isn't a line separator. */
85 for (p = symbol_chars; *p; ++p)
86 {
87 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
88 } /* declare symbol characters */
89
90 for (p = line_comment_chars; *p; p++)
91 {
92 lex[*p] = LEX_IS_LINE_COMMENT_START;
93 } /* declare line comment chars */
94
95 for (p = comment_chars; *p; p++)
96 {
97 lex[*p] = LEX_IS_COMMENT_START;
98 } /* declare comment chars */
99
100 for (p = line_separator_chars; *p; p++)
101 {
102 lex[*p] = LEX_IS_LINE_SEPARATOR;
103 } /* declare line separators */
104
105 /* Only allow slash-star comments if slash is not in use */
106 if (lex['/'] == 0)
107 {
108 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
109 }
110 /* FIXME-soon. This is a bad hack but otherwise, we
111 can't do c-style comments when '/' is a line
112 comment char. xoxorich. */
113 if (lex['*'] == 0)
114 {
115 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
116 }
117 } /* do_scrub_begin() */
118
119 FILE *scrub_file;
120
121 int
122 scrub_from_file ()
123 {
124 return getc (scrub_file);
125 }
126
127 void
128 scrub_to_file (ch)
129 int ch;
130 {
131 ungetc (ch, scrub_file);
132 } /* scrub_to_file() */
133
134 char *scrub_string;
135 char *scrub_last_string;
136
137 int
138 scrub_from_string ()
139 {
140 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
141 } /* scrub_from_string() */
142
143 void
144 scrub_to_string (ch)
145 int ch;
146 {
147 *--scrub_string = ch;
148 } /* scrub_to_string() */
149
150 /* Saved state of the scrubber */
151 static int state;
152 static int old_state;
153 static char *out_string;
154 static char out_buf[20];
155 static int add_newlines = 0;
156
157 /* Data structure for saving the state of app across #include's. Note that
158 app is called asynchronously to the parsing of the .include's, so our
159 state at the time .include is interpreted is completely unrelated.
160 That's why we have to save it all. */
161
162 struct app_save
163 {
164 int state;
165 int old_state;
166 char *out_string;
167 char out_buf[sizeof (out_buf)];
168 int add_newlines;
169 char *scrub_string;
170 char *scrub_last_string;
171 FILE *scrub_file;
172 };
173
174 char *
175 app_push ()
176 {
177 register struct app_save *saved;
178
179 saved = (struct app_save *) xmalloc (sizeof (*saved));
180 saved->state = state;
181 saved->old_state = old_state;
182 saved->out_string = out_string;
183 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
184 saved->add_newlines = add_newlines;
185 saved->scrub_string = scrub_string;
186 saved->scrub_last_string = scrub_last_string;
187 saved->scrub_file = scrub_file;
188
189 /* do_scrub_begin() is not useful, just wastes time. */
190 return (char *) saved;
191 }
192
193 void
194 app_pop (arg)
195 char *arg;
196 {
197 register struct app_save *saved = (struct app_save *) arg;
198
199 /* There is no do_scrub_end (). */
200 state = saved->state;
201 old_state = saved->old_state;
202 out_string = saved->out_string;
203 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
204 add_newlines = saved->add_newlines;
205 scrub_string = saved->scrub_string;
206 scrub_last_string = saved->scrub_last_string;
207 scrub_file = saved->scrub_file;
208
209 free (arg);
210 } /* app_pop() */
211
212 /* @@ This assumes that \n &c are the same on host and target. This is not
213 necessarily true. */
214 int
215 process_escape (ch)
216 char ch;
217 {
218 switch (ch)
219 {
220 case 'b':
221 return '\b';
222 case 'f':
223 return '\f';
224 case 'n':
225 return '\n';
226 case 'r':
227 return '\r';
228 case 't':
229 return '\t';
230 case '\'':
231 return '\'';
232 case '"':
233 return '\"';
234 default:
235 return ch;
236 }
237 }
238 int
239 do_scrub_next_char (get, unget)
240 int (*get) ();
241 void (*unget) ();
242 {
243 /*State 0: beginning of normal line
244 1: After first whitespace on line (flush more white)
245 2: After first non-white (opcode) on line (keep 1white)
246 3: after second white on line (into operands) (flush white)
247 4: after putting out a .line, put out digits
248 5: parsing a string, then go to old-state
249 6: putting out \ escape in a "d string.
250 7: After putting out a .app-file, put out string.
251 8: After putting out a .app-file string, flush until newline.
252 9: After seeing symbol char in state 3 (keep 1white after symchar)
253 -1: output string in out_string and go to the state in old_state
254 -2: flush text until a '*' '/' is seen, then go to state old_state
255 */
256
257 /* I added state 9 because the MIPS ECOFF assembler uses constructs
258 like ``.loc 1 20''. This was turning into ``.loc 120''. State 9
259 ensures that a space is never dropped immediately following a
260 character which could appear in a identifier. It is still
261 dropped following a comma, so this has no effect for most
262 assemblers. I hope. Ian Taylor, ian@cygnus.com. */
263
264 register int ch, ch2 = 0;
265
266 switch (state)
267 {
268 case -1:
269 ch = *out_string++;
270 if (*out_string == 0)
271 {
272 state = old_state;
273 old_state = 3;
274 }
275 return ch;
276
277 case -2:
278 for (;;)
279 {
280 do
281 {
282 ch = (*get) ();
283 }
284 while (ch != EOF && ch != '\n' && ch != '*');
285 if (ch == '\n' || ch == EOF)
286 return ch;
287
288 /* At this point, ch must be a '*' */
289 while ((ch = (*get) ()) == '*')
290 {
291 ;
292 }
293 if (ch == EOF || ch == '/')
294 break;
295 (*unget) (ch);
296 }
297 state = old_state;
298 return ' ';
299
300 case 4:
301 ch = (*get) ();
302 if (ch == EOF || (ch >= '0' && ch <= '9'))
303 return ch;
304 else
305 {
306 while (ch != EOF && IS_WHITESPACE (ch))
307 ch = (*get) ();
308 if (ch == '"')
309 {
310 (*unget) (ch);
311 out_string = "\n.app-file ";
312 old_state = 7;
313 state = -1;
314 return *out_string++;
315 }
316 else
317 {
318 while (ch != EOF && ch != '\n')
319 ch = (*get) ();
320 return ch;
321 }
322 }
323
324 case 5:
325 ch = (*get) ();
326 if (lex[ch] == LEX_IS_STRINGQUOTE)
327 {
328 state = old_state;
329 return ch;
330 }
331 else if (ch == '\\')
332 {
333 state = 6;
334 return ch;
335 }
336 else if (ch == EOF)
337 {
338 as_warn ("End of file in string: inserted '\"'");
339 state = old_state;
340 (*unget) ('\n');
341 return '"';
342 }
343 else
344 {
345 return ch;
346 }
347
348 case 6:
349 state = 5;
350 ch = (*get) ();
351 switch (ch)
352 {
353 /* Handle strings broken across lines, by turning '\n' into
354 '\\' and 'n'. */
355 case '\n':
356 (*unget) ('n');
357 add_newlines++;
358 return '\\';
359
360 case '"':
361 case '\\':
362 case 'b':
363 case 'f':
364 case 'n':
365 case 'r':
366 case 't':
367 #ifdef BACKSLASH_V
368 case 'v':
369 #endif /* BACKSLASH_V */
370 case '0':
371 case '1':
372 case '2':
373 case '3':
374 case '4':
375 case '5':
376 case '6':
377 case '7':
378 break;
379 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
380 default:
381 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
382 break;
383 #else /* ONLY_STANDARD_ESCAPES */
384 default:
385 /* Accept \x as x for any x */
386 break;
387 #endif /* ONLY_STANDARD_ESCAPES */
388
389 case EOF:
390 as_warn ("End of file in string: '\"' inserted");
391 return '"';
392 }
393 return ch;
394
395 case 7:
396 ch = (*get) ();
397 state = 5;
398 old_state = 8;
399 return ch;
400
401 case 8:
402 do
403 ch = (*get) ();
404 while (ch != '\n');
405 state = 0;
406 return ch;
407 }
408
409 /* OK, we are somewhere in states 0 through 4 or 9 */
410
411 /* flushchar: */
412 ch = (*get) ();
413 recycle:
414 if (ch == EOF)
415 {
416 if (state != 0)
417 as_warn ("End of file not at end of a line: Newline inserted.");
418 return ch;
419 }
420
421 switch (lex[ch])
422 {
423 case LEX_IS_WHITESPACE:
424 do
425 ch = (*get) ();
426 while (ch != EOF && IS_WHITESPACE (ch));
427 if (ch == EOF)
428 return ch;
429
430 if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
431 {
432 goto recycle;
433 }
434 #ifdef MRI
435 (*unget) (ch); /* Put back */
436 return ' '; /* Always return one space at start of line */
437 #endif
438
439 /* If we're in state 2, we've seen a non-white
440 character followed by whitespace. If the next
441 character is ':', this is whitespace after a label
442 name which we can ignore. */
443 if (state == 2 && lex[ch] == LEX_IS_COLON)
444 {
445 state = 0;
446 return ch;
447 }
448
449 switch (state)
450 {
451 case 0:
452 state++;
453 goto recycle; /* Punted leading sp */
454 case 1:
455 BAD_CASE (state); /* We can't get here */
456 case 2:
457 case 9:
458 state = 3;
459 (*unget) (ch);
460 return ' '; /* Sp after opco */
461 case 3:
462 goto recycle; /* Sp in operands */
463 default:
464 BAD_CASE (state);
465 }
466 break;
467
468 case LEX_IS_TWOCHAR_COMMENT_1ST:
469 ch2 = (*get) ();
470 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
471 {
472 for (;;)
473 {
474 do
475 {
476 ch2 = (*get) ();
477 if (ch2 != EOF && IS_NEWLINE (ch2))
478 add_newlines++;
479 }
480 while (ch2 != EOF &&
481 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
482
483 while (ch2 != EOF &&
484 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
485 {
486 ch2 = (*get) ();
487 }
488
489 if (ch2 == EOF
490 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
491 break;
492 (*unget) (ch);
493 }
494 if (ch2 == EOF)
495 as_warn ("End of file in multiline comment");
496
497 ch = ' ';
498 goto recycle;
499 }
500 else
501 {
502 if (ch2 != EOF)
503 (*unget) (ch2);
504 return ch;
505 }
506 break;
507
508 case LEX_IS_STRINGQUOTE:
509 old_state = state;
510 state = 5;
511 return ch;
512 #ifndef MRI
513 #ifndef IEEE_STYLE
514 case LEX_IS_ONECHAR_QUOTE:
515 ch = (*get) ();
516 if (ch == EOF)
517 {
518 as_warn ("End-of-file after a one-character quote; \\000 inserted");
519 ch = 0;
520 }
521 if (ch == '\\')
522 {
523 ch = (*get) ();
524 ch = process_escape (ch);
525 }
526 sprintf (out_buf, "%d", (int) (unsigned char) ch);
527
528
529 /* None of these 'x constants for us. We want 'x'.
530 */
531 if ((ch = (*get) ()) != '\'')
532 {
533 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
534 as_warn ("Missing close quote: (assumed)");
535 #else
536 (*unget) (ch);
537 #endif
538 }
539 if (strlen (out_buf) == 1)
540 {
541 return out_buf[0];
542 }
543 old_state = state;
544 state = -1;
545 out_string = out_buf;
546 return *out_string++;
547 #endif
548 #endif
549 case LEX_IS_COLON:
550 if (state != 3)
551 state = 0;
552 return ch;
553
554 case LEX_IS_NEWLINE:
555 /* Roll out a bunch of newlines from inside comments, etc. */
556 if (add_newlines)
557 {
558 --add_newlines;
559 (*unget) (ch);
560 }
561 /* fall thru into... */
562
563 case LEX_IS_LINE_SEPARATOR:
564 state = 0;
565 return ch;
566
567 case LEX_IS_LINE_COMMENT_START:
568 if (state != 0) /* Not at start of line, act normal */
569 goto de_fault;
570
571 /* FIXME-someday: The two character comment stuff was badly
572 thought out. On i386, we want '/' as line comment start AND
573 we want C style comments. hence this hack. The whole
574 lexical process should be reworked. xoxorich. */
575
576 if (ch == '/')
577 {
578 ch2 = (*get) ();
579 if (ch2 == '*')
580 {
581 state = -2;
582 return (do_scrub_next_char (get, unget));
583 }
584 else
585 {
586 (*unget) (ch2);
587 }
588 } /* bad hack */
589
590 do
591 ch = (*get) ();
592 while (ch != EOF && IS_WHITESPACE (ch));
593 if (ch == EOF)
594 {
595 as_warn ("EOF in comment: Newline inserted");
596 return '\n';
597 }
598 if (ch < '0' || ch > '9')
599 {
600 /* Non-numerics: Eat whole comment line */
601 while (ch != EOF && !IS_NEWLINE (ch))
602 ch = (*get) ();
603 if (ch == EOF)
604 as_warn ("EOF in Comment: Newline inserted");
605 state = 0;
606 return '\n';
607 }
608 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
609 (*unget) (ch);
610 old_state = 4;
611 state = -1;
612 out_string = ".line ";
613 return *out_string++;
614
615 case LEX_IS_COMMENT_START:
616 do
617 ch = (*get) ();
618 while (ch != EOF && !IS_NEWLINE (ch));
619 if (ch == EOF)
620 as_warn ("EOF in comment: Newline inserted");
621 state = 0;
622 return '\n';
623
624 case LEX_IS_SYMBOL_COMPONENT:
625 if (state == 3)
626 state = 9;
627 /* Fall through. */
628 default:
629 de_fault:
630 /* Some relatively `normal' character. */
631 if (state == 0)
632 {
633 state = 2; /* Now seeing opcode */
634 return ch;
635 }
636 else if (state == 1)
637 {
638 state = 2; /* Ditto */
639 return ch;
640 }
641 else if (state == 9)
642 {
643 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
644 state = 3;
645 return ch;
646 }
647 else
648 {
649 return ch; /* Opcode or operands already */
650 }
651 }
652 return -1;
653 }
654
655 #ifdef TEST
656
657 const char comment_chars[] = "|";
658 const char line_comment_chars[] = "#";
659
660 main ()
661 {
662 int ch;
663
664 app_begin ();
665 while ((ch = do_scrub_next_char (stdin)) != EOF)
666 putc (ch, stdout);
667 }
668
669 as_warn (str)
670 char *str;
671 {
672 fputs (str, stderr);
673 putc ('\n', stderr);
674 }
675
676 #endif
677
678 /*
679 * Local Variables:
680 * comment-column: 0
681 * fill-column: 131
682 * End:
683 */
684
685 /* end of app.c */
This page took 0.042447 seconds and 4 git commands to generate.