Tue Dec 29 15:06:00 1992 Ian Lance Taylor (ian@cygnus.com)
[deliverable/binutils-gdb.git] / gas / app.c
CommitLineData
3340f7e5 1/* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
6efd877d 2
a39116f1
RP
3 Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
4 */
fecd2382
RP
5/* This is the Assembler Pre-Processor
6 Copyright (C) 1987 Free Software Foundation, Inc.
6efd877d 7
a39116f1 8 This file is part of GAS, the GNU Assembler.
6efd877d 9
a39116f1
RP
10 GAS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
6efd877d 14
a39116f1
RP
15 GAS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
6efd877d 19
a39116f1
RP
20 You should have received a copy of the GNU General Public License
21 along with GAS; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
fecd2382
RP
23
24/* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
be06bdcd
SC
26 # <number> <filename> <garbage> into a .line <number>\n.app-file <filename>
27 pair. This needs better error-handling.
a39116f1 28 */
fecd2382
RP
29
30#include <stdio.h>
6efd877d
KR
31#include "as.h" /* For BAD_CASE() only */
32#include "read.h"
fecd2382 33
3340f7e5 34#if (__STDC__ != 1) && !defined(const)
6efd877d 35#define const /* Nothing */
fecd2382
RP
36#endif
37
6efd877d
KR
38static char lex[256];
39static char symbol_chars[] =
40"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
fecd2382
RP
41
42#define LEX_IS_SYMBOL_COMPONENT 1
43#define LEX_IS_WHITESPACE 2
44#define LEX_IS_LINE_SEPARATOR 3
45#define LEX_IS_COMMENT_START 4
46#define LEX_IS_LINE_COMMENT_START 5
47#define LEX_IS_TWOCHAR_COMMENT_1ST 6
48#define LEX_IS_TWOCHAR_COMMENT_2ND 7
49#define LEX_IS_STRINGQUOTE 8
50#define LEX_IS_COLON 9
51#define LEX_IS_NEWLINE 10
52#define LEX_IS_ONECHAR_QUOTE 11
a39116f1
RP
53#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
54#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
55#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
56#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
57#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
58#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
59
60/* FIXME-soon: The entire lexer/parser thingy should be
61 built statically at compile time rather than dynamically
62 each and every time the assembler is run. xoxorich. */
fecd2382 63
6efd877d
KR
64void
65do_scrub_begin ()
66{
67 const char *p;
68
69 lex[' '] = LEX_IS_WHITESPACE;
70 lex['\t'] = LEX_IS_WHITESPACE;
71 lex['\n'] = LEX_IS_NEWLINE;
72 lex[';'] = LEX_IS_LINE_SEPARATOR;
73 lex['"'] = LEX_IS_STRINGQUOTE;
74 lex['\''] = LEX_IS_ONECHAR_QUOTE;
75 lex[':'] = LEX_IS_COLON;
7c2d4011 76
be06bdcd
SC
77
78
79#ifdef SINGLE_QUOTE_STRINGS
80 lex['\''] = LEX_IS_STRINGQUOTE;
7c2d4011 81#endif
be06bdcd 82
6efd877d 83 /* Note that these override the previous defaults, e.g. if ';'
be06bdcd 84
fecd2382 85 is a comment char, then it isn't a line separator. */
6efd877d
KR
86 for (p = symbol_chars; *p; ++p)
87 {
88 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
89 } /* declare symbol characters */
90
91 for (p = line_comment_chars; *p; p++)
92 {
93 lex[*p] = LEX_IS_LINE_COMMENT_START;
94 } /* declare line comment chars */
95
96 for (p = comment_chars; *p; p++)
97 {
98 lex[*p] = LEX_IS_COMMENT_START;
99 } /* declare comment chars */
100
101 for (p = line_separator_chars; *p; p++)
102 {
103 lex[*p] = LEX_IS_LINE_SEPARATOR;
104 } /* declare line separators */
105
106 /* Only allow slash-star comments if slash is not in use */
107 if (lex['/'] == 0)
108 {
109 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
110 }
111 /* FIXME-soon. This is a bad hack but otherwise, we
a39116f1
RP
112 can't do c-style comments when '/' is a line
113 comment char. xoxorich. */
6efd877d
KR
114 if (lex['*'] == 0)
115 {
116 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
117 }
118} /* do_scrub_begin() */
fecd2382
RP
119
120FILE *scrub_file;
121
6efd877d
KR
122int
123scrub_from_file ()
124{
125 return getc (scrub_file);
fecd2382
RP
126}
127
6efd877d
KR
128void
129scrub_to_file (ch)
130 int ch;
fecd2382 131{
6efd877d
KR
132 ungetc (ch, scrub_file);
133} /* scrub_to_file() */
fecd2382
RP
134
135char *scrub_string;
136char *scrub_last_string;
137
6efd877d
KR
138int
139scrub_from_string ()
140{
141 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
142} /* scrub_from_string() */
fecd2382 143
6efd877d
KR
144void
145scrub_to_string (ch)
146 int ch;
fecd2382 147{
6efd877d
KR
148 *--scrub_string = ch;
149} /* scrub_to_string() */
fecd2382
RP
150
151/* Saved state of the scrubber */
152static int state;
153static int old_state;
154static char *out_string;
155static char out_buf[20];
156static int add_newlines = 0;
157
158/* Data structure for saving the state of app across #include's. Note that
159 app is called asynchronously to the parsing of the .include's, so our
160 state at the time .include is interpreted is completely unrelated.
161 That's why we have to save it all. */
162
6efd877d
KR
163struct app_save
164 {
165 int state;
166 int old_state;
167 char *out_string;
168 char out_buf[sizeof (out_buf)];
169 int add_newlines;
170 char *scrub_string;
171 char *scrub_last_string;
172 FILE *scrub_file;
173 };
174
175char *
176app_push ()
177{
7c2d4011
SC
178 register struct app_save *saved;
179
6efd877d
KR
180 saved = (struct app_save *) xmalloc (sizeof (*saved));
181 saved->state = state;
182 saved->old_state = old_state;
183 saved->out_string = out_string;
184 bcopy (saved->out_buf, out_buf, sizeof (out_buf));
185 saved->add_newlines = add_newlines;
186 saved->scrub_string = scrub_string;
7c2d4011 187 saved->scrub_last_string = scrub_last_string;
6efd877d 188 saved->scrub_file = scrub_file;
7c2d4011
SC
189
190 /* do_scrub_begin() is not useful, just wastes time. */
6efd877d 191 return (char *) saved;
fecd2382
RP
192}
193
6efd877d
KR
194void
195app_pop (arg)
196 char *arg;
fecd2382 197{
6efd877d
KR
198 register struct app_save *saved = (struct app_save *) arg;
199
200 /* There is no do_scrub_end (). */
201 state = saved->state;
202 old_state = saved->old_state;
203 out_string = saved->out_string;
204 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
205 add_newlines = saved->add_newlines;
206 scrub_string = saved->scrub_string;
207 scrub_last_string = saved->scrub_last_string;
208 scrub_file = saved->scrub_file;
209
210 free (arg);
211} /* app_pop() */
212
213int
214process_escape (ch)
215 char ch;
7c2d4011 216{
6efd877d
KR
217 switch (ch)
218 {
219 case 'b':
220 return '\b';
221 case 'f':
222 return '\f';
223 case 'n':
224 return '\n';
225 case 'r':
226 return '\r';
227 case 't':
228 return '\t';
229 case '\'':
230 return '\'';
231 case '"':
232 return '\'';
233 default:
234 return ch;
235 }
7c2d4011 236}
6efd877d
KR
237int
238do_scrub_next_char (get, unget)
239 int (*get) ();
240 void (*unget) ();
fecd2382 241{
6efd877d 242 /*State 0: beginning of normal line
a39116f1
RP
243 1: After first whitespace on line (flush more white)
244 2: After first non-white (opcode) on line (keep 1white)
245 3: after second white on line (into operands) (flush white)
246 4: after putting out a .line, put out digits
247 5: parsing a string, then go to old-state
248 6: putting out \ escape in a "d string.
249 7: After putting out a .app-file, put out string.
250 8: After putting out a .app-file string, flush until newline.
251 -1: output string in out_string and go to the state in old_state
252 -2: flush text until a '*' '/' is seen, then go to state old_state
253 */
6efd877d
KR
254
255 register int ch, ch2 = 0;
256
257 switch (state)
258 {
259 case -1:
260 ch = *out_string++;
261 if (*out_string == 0)
262 {
263 state = old_state;
264 old_state = 3;
265 }
266 return ch;
267
268 case -2:
269 for (;;)
270 {
271 do
272 {
273 ch = (*get) ();
274 }
275 while (ch != EOF && ch != '\n' && ch != '*');
276 if (ch == '\n' || ch == EOF)
277 return ch;
278
279 /* At this point, ch must be a '*' */
280 while ((ch = (*get) ()) == '*')
281 {
282 ;
283 }
284 if (ch == EOF || ch == '/')
285 break;
286 (*unget) (ch);
287 }
288 state = old_state;
289 return ' ';
290
291 case 4:
292 ch = (*get) ();
293 if (ch == EOF || (ch >= '0' && ch <= '9'))
294 return ch;
295 else
296 {
297 while (ch != EOF && IS_WHITESPACE (ch))
298 ch = (*get) ();
299 if (ch == '"')
300 {
301 (*unget) (ch);
302 out_string = "\n.app-file ";
303 old_state = 7;
304 state = -1;
305 return *out_string++;
306 }
307 else
308 {
309 while (ch != EOF && ch != '\n')
310 ch = (*get) ();
311 return ch;
312 }
313 }
314
315 case 5:
316 ch = (*get) ();
317 if (lex[ch] == LEX_IS_STRINGQUOTE)
318 {
319 state = old_state;
320 return ch;
321 }
322 else if (ch == '\\')
323 {
324 state = 6;
325 return ch;
326 }
327 else if (ch == EOF)
328 {
329 as_warn ("End of file in string: inserted '\"'");
330 state = old_state;
331 (*unget) ('\n');
332 return '"';
333 }
334 else
335 {
336 return ch;
337 }
338
339 case 6:
340 state = 5;
341 ch = (*get) ();
342 switch (ch)
343 {
344 /* This is neet. Turn "string
fecd2382 345 more string" into "string\n more string"
a39116f1 346 */
6efd877d
KR
347 case '\n':
348 (*unget) ('n');
349 add_newlines++;
350 return '\\';
351
352 case '"':
353 case '\\':
354 case 'b':
355 case 'f':
356 case 'n':
357 case 'r':
358 case 't':
fecd2382 359#ifdef BACKSLASH_V
6efd877d 360 case 'v':
fecd2382 361#endif /* BACKSLASH_V */
6efd877d
KR
362 case '0':
363 case '1':
364 case '2':
365 case '3':
366 case '4':
367 case '5':
368 case '6':
369 case '7':
370 break;
7c2d4011 371#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
6efd877d
KR
372 default:
373 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
374 break;
fecd2382 375#else /* ONLY_STANDARD_ESCAPES */
6efd877d
KR
376 default:
377 /* Accept \x as x for any x */
378 break;
fecd2382 379#endif /* ONLY_STANDARD_ESCAPES */
7c2d4011 380
6efd877d
KR
381 case EOF:
382 as_warn ("End of file in string: '\"' inserted");
383 return '"';
384 }
385 return ch;
386
387 case 7:
388 ch = (*get) ();
389 state = 5;
390 old_state = 8;
391 return ch;
392
393 case 8:
394 do
395 ch = (*get) ();
396 while (ch != '\n');
397 state = 0;
398 return ch;
399 }
400
401 /* OK, we are somewhere in states 0 through 4 */
402
403 /* flushchar: */
404 ch = (*get) ();
405recycle:
406 if (ch == EOF)
407 {
408 if (state != 0)
409 as_warn ("End of file not at end of a line: Newline inserted.");
410 return ch;
411 }
412
413 switch (lex[ch])
414 {
415 case LEX_IS_WHITESPACE:
416 do
417 ch = (*get) ();
418 while (ch != EOF && IS_WHITESPACE (ch));
419 if (ch == EOF)
420 return ch;
421
422 if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
423 {
424 goto recycle;
fecd2382 425 }
7c2d4011 426#ifdef MRI
6efd877d
KR
427 (*unget) (ch); /* Put back */
428 return ' '; /* Always return one space at start of line */
7c2d4011 429#endif
6efd877d
KR
430
431 /* If we're in state 2, we've seen a non-white
432 character followed by whitespace. If the next
433 character is ':', this is whitespace after a label
434 name which we can ignore. */
435 if (state == 2 && lex[ch] == LEX_IS_COLON)
436 {
437 state = 0;
438 return ch;
439 }
440
441 switch (state)
442 {
443 case 0:
444 state++;
445 goto recycle; /* Punted leading sp */
446 case 1:
447 BAD_CASE (state); /* We can't get here */
448 case 2:
449 state++;
450 (*unget) (ch);
451 return ' '; /* Sp after opco */
452 case 3:
453 goto recycle; /* Sp in operands */
454 default:
455 BAD_CASE (state);
456 }
457 break;
458
459 case LEX_IS_TWOCHAR_COMMENT_1ST:
460 ch2 = (*get) ();
461 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
462 {
463 for (;;)
464 {
465 do
466 {
467 ch2 = (*get) ();
468 if (ch2 != EOF && IS_NEWLINE (ch2))
469 add_newlines++;
fecd2382 470 }
6efd877d
KR
471 while (ch2 != EOF &&
472 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
473
474 while (ch2 != EOF &&
475 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
476 {
477 ch2 = (*get) ();
fecd2382 478 }
6efd877d
KR
479
480 if (ch2 == EOF
481 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
fecd2382 482 break;
6efd877d
KR
483 (*unget) (ch);
484 }
485 if (ch2 == EOF)
486 as_warn ("End of file in multiline comment");
487
488 ch = ' ';
489 goto recycle;
490 }
491 else
492 {
493 if (ch2 != EOF)
494 (*unget) (ch2);
495 return ch;
496 }
497 break;
498
499 case LEX_IS_STRINGQUOTE:
500 old_state = state;
501 state = 5;
502 return ch;
503#ifndef MRI
a39116f1 504#ifndef IEEE_STYLE
6efd877d
KR
505 case LEX_IS_ONECHAR_QUOTE:
506 ch = (*get) ();
507 if (ch == EOF)
508 {
509 as_warn ("End-of-file after a one-character quote; \\000 inserted");
510 ch = 0;
511 }
512 if (ch == '\\')
513 {
514 ch = (*get) ();
515 ch = process_escape (ch);
516 }
517 sprintf (out_buf, "%d", (int) (unsigned char) ch);
7c2d4011 518
6efd877d
KR
519
520 /* None of these 'x constants for us. We want 'x'.
fecd2382 521 */
6efd877d
KR
522 if ((ch = (*get) ()) != '\'')
523 {
fecd2382 524#ifdef REQUIRE_CHAR_CLOSE_QUOTE
6efd877d 525 as_warn ("Missing close quote: (assumed)");
fecd2382 526#else
6efd877d 527 (*unget) (ch);
fecd2382 528#endif
6efd877d
KR
529 }
530 if (strlen (out_buf) == 1)
531 {
532 return out_buf[0];
533 }
534 old_state = state;
535 state = -1;
536 out_string = out_buf;
537 return *out_string++;
7c2d4011 538#endif
a39116f1 539#endif
6efd877d
KR
540 case LEX_IS_COLON:
541 if (state != 3)
542 state = 0;
543 return ch;
544
545 case LEX_IS_NEWLINE:
546 /* Roll out a bunch of newlines from inside comments, etc. */
547 if (add_newlines)
548 {
549 --add_newlines;
550 (*unget) (ch);
551 }
552 /* fall thru into... */
553
554 case LEX_IS_LINE_SEPARATOR:
555 state = 0;
556 return ch;
557
558 case LEX_IS_LINE_COMMENT_START:
559 if (state != 0) /* Not at start of line, act normal */
560 goto de_fault;
561
562 /* FIXME-someday: The two character comment stuff was badly
7c2d4011
SC
563 thought out. On i386, we want '/' as line comment start
564 AND we want C style comments. hence this hack. The
565 whole lexical process should be reworked. xoxorich. */
566
6efd877d
KR
567 if (ch == '/' && (ch2 = (*get) ()) == '*')
568 {
569 state = -2;
570 return (do_scrub_next_char (get, unget));
571 }
572 else
573 {
574 (*unget) (ch2);
575 } /* bad hack */
576
577 do
578 ch = (*get) ();
579 while (ch != EOF && IS_WHITESPACE (ch));
580 if (ch == EOF)
581 {
582 as_warn ("EOF in comment: Newline inserted");
583 return '\n';
584 }
585 if (ch < '0' || ch > '9')
586 {
587 /* Non-numerics: Eat whole comment line */
588 while (ch != EOF && !IS_NEWLINE (ch))
589 ch = (*get) ();
590 if (ch == EOF)
591 as_warn ("EOF in Comment: Newline inserted");
592 state = 0;
593 return '\n';
594 }
595 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
596 (*unget) (ch);
597 old_state = 4;
598 state = -1;
599 out_string = ".line ";
600 return *out_string++;
601
602 case LEX_IS_COMMENT_START:
603 do
604 ch = (*get) ();
605 while (ch != EOF && !IS_NEWLINE (ch));
606 if (ch == EOF)
607 as_warn ("EOF in comment: Newline inserted");
608 state = 0;
609 return '\n';
610
611 default:
612 de_fault:
613 /* Some relatively `normal' character. */
614 if (state == 0)
615 {
616 state = 2; /* Now seeing opcode */
617 return ch;
fecd2382 618 }
6efd877d
KR
619 else if (state == 1)
620 {
621 state = 2; /* Ditto */
622 return ch;
623 }
624 else
625 {
626 return ch; /* Opcode or operands already */
627 }
628 }
629 return -1;
fecd2382
RP
630}
631
632#ifdef TEST
633
6efd877d
KR
634const char comment_chars[] = "|";
635const char line_comment_chars[] = "#";
fecd2382 636
6efd877d 637main ()
fecd2382 638{
6efd877d
KR
639 int ch;
640
641 app_begin ();
642 while ((ch = do_scrub_next_char (stdin)) != EOF)
643 putc (ch, stdout);
fecd2382
RP
644}
645
6efd877d
KR
646as_warn (str)
647 char *str;
fecd2382 648{
6efd877d
KR
649 fputs (str, stderr);
650 putc ('\n', stderr);
fecd2382 651}
6efd877d 652
fecd2382
RP
653#endif
654
655/*
656 * Local Variables:
657 * comment-column: 0
658 * fill-column: 131
659 * End:
660 */
661
662/* end of app.c */
This page took 0.087252 seconds and 4 git commands to generate.