* nm-hp300bsd.h (PTRACE_ARG3_TYPE): FSF's hp300's have int* not caddr_t.
[deliverable/binutils-gdb.git] / gas / app.c
1 /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
2
3 Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
4 */
5 /* This is the Assembler Pre-Processor
6 Copyright (C) 1987 Free Software Foundation, Inc.
7
8 This file is part of GAS, the GNU Assembler.
9
10 GAS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 GAS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GAS; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
23
24 /* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
26 # <number> <filename> <garbage> into a .line <number>\n.app-file <filename>
27 pair. This needs better error-handling.
28 */
29
30 #include <stdio.h>
31 #include "as.h" /* For BAD_CASE() only */
32 #include "read.h"
33
34 #if (__STDC__ != 1) && !defined(const)
35 #define const /* Nothing */
36 #endif
37
38 static char lex[256];
39 static const char symbol_chars[] =
40 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
41
42 #define LEX_IS_SYMBOL_COMPONENT 1
43 #define LEX_IS_WHITESPACE 2
44 #define LEX_IS_LINE_SEPARATOR 3
45 #define LEX_IS_COMMENT_START 4
46 #define LEX_IS_LINE_COMMENT_START 5
47 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
48 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
49 #define LEX_IS_STRINGQUOTE 8
50 #define LEX_IS_COLON 9
51 #define LEX_IS_NEWLINE 10
52 #define LEX_IS_ONECHAR_QUOTE 11
53 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
54 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
55 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
56 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
57 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
58 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
59
60 /* FIXME-soon: The entire lexer/parser thingy should be
61 built statically at compile time rather than dynamically
62 each and every time the assembler is run. xoxorich. */
63
64 void
65 do_scrub_begin ()
66 {
67 const char *p;
68
69 lex[' '] = LEX_IS_WHITESPACE;
70 lex['\t'] = LEX_IS_WHITESPACE;
71 lex['\n'] = LEX_IS_NEWLINE;
72 lex[';'] = LEX_IS_LINE_SEPARATOR;
73 lex['"'] = LEX_IS_STRINGQUOTE;
74 lex['\''] = LEX_IS_ONECHAR_QUOTE;
75 lex[':'] = LEX_IS_COLON;
76
77
78
79 #ifdef SINGLE_QUOTE_STRINGS
80 lex['\''] = LEX_IS_STRINGQUOTE;
81 #endif
82
83 /* Note that these override the previous defaults, e.g. if ';'
84
85 is a comment char, then it isn't a line separator. */
86 for (p = symbol_chars; *p; ++p)
87 {
88 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
89 } /* declare symbol characters */
90
91 for (p = line_comment_chars; *p; p++)
92 {
93 lex[*p] = LEX_IS_LINE_COMMENT_START;
94 } /* declare line comment chars */
95
96 for (p = comment_chars; *p; p++)
97 {
98 lex[*p] = LEX_IS_COMMENT_START;
99 } /* declare comment chars */
100
101 for (p = line_separator_chars; *p; p++)
102 {
103 lex[*p] = LEX_IS_LINE_SEPARATOR;
104 } /* declare line separators */
105
106 /* Only allow slash-star comments if slash is not in use */
107 if (lex['/'] == 0)
108 {
109 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
110 }
111 /* FIXME-soon. This is a bad hack but otherwise, we
112 can't do c-style comments when '/' is a line
113 comment char. xoxorich. */
114 if (lex['*'] == 0)
115 {
116 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
117 }
118 } /* do_scrub_begin() */
119
120 FILE *scrub_file;
121
122 int
123 scrub_from_file ()
124 {
125 return getc (scrub_file);
126 }
127
128 void
129 scrub_to_file (ch)
130 int ch;
131 {
132 ungetc (ch, scrub_file);
133 } /* scrub_to_file() */
134
135 char *scrub_string;
136 char *scrub_last_string;
137
138 int
139 scrub_from_string ()
140 {
141 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
142 } /* scrub_from_string() */
143
144 void
145 scrub_to_string (ch)
146 int ch;
147 {
148 *--scrub_string = ch;
149 } /* scrub_to_string() */
150
151 /* Saved state of the scrubber */
152 static int state;
153 static int old_state;
154 static char *out_string;
155 static char out_buf[20];
156 static int add_newlines = 0;
157
158 /* Data structure for saving the state of app across #include's. Note that
159 app is called asynchronously to the parsing of the .include's, so our
160 state at the time .include is interpreted is completely unrelated.
161 That's why we have to save it all. */
162
163 struct app_save
164 {
165 int state;
166 int old_state;
167 char *out_string;
168 char out_buf[sizeof (out_buf)];
169 int add_newlines;
170 char *scrub_string;
171 char *scrub_last_string;
172 FILE *scrub_file;
173 };
174
175 char *
176 app_push ()
177 {
178 register struct app_save *saved;
179
180 saved = (struct app_save *) xmalloc (sizeof (*saved));
181 saved->state = state;
182 saved->old_state = old_state;
183 saved->out_string = out_string;
184 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
185 saved->add_newlines = add_newlines;
186 saved->scrub_string = scrub_string;
187 saved->scrub_last_string = scrub_last_string;
188 saved->scrub_file = scrub_file;
189
190 /* do_scrub_begin() is not useful, just wastes time. */
191 return (char *) saved;
192 }
193
194 void
195 app_pop (arg)
196 char *arg;
197 {
198 register struct app_save *saved = (struct app_save *) arg;
199
200 /* There is no do_scrub_end (). */
201 state = saved->state;
202 old_state = saved->old_state;
203 out_string = saved->out_string;
204 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
205 add_newlines = saved->add_newlines;
206 scrub_string = saved->scrub_string;
207 scrub_last_string = saved->scrub_last_string;
208 scrub_file = saved->scrub_file;
209
210 free (arg);
211 } /* app_pop() */
212
213 /* @@ This assumes that \n &c are the same on host and target. This is not
214 necessarily true. */
215 int
216 process_escape (ch)
217 char ch;
218 {
219 switch (ch)
220 {
221 case 'b':
222 return '\b';
223 case 'f':
224 return '\f';
225 case 'n':
226 return '\n';
227 case 'r':
228 return '\r';
229 case 't':
230 return '\t';
231 case '\'':
232 return '\'';
233 case '"':
234 return '\"';
235 default:
236 return ch;
237 }
238 }
239 int
240 do_scrub_next_char (get, unget)
241 int (*get) ();
242 void (*unget) ();
243 {
244 /*State 0: beginning of normal line
245 1: After first whitespace on line (flush more white)
246 2: After first non-white (opcode) on line (keep 1white)
247 3: after second white on line (into operands) (flush white)
248 4: after putting out a .line, put out digits
249 5: parsing a string, then go to old-state
250 6: putting out \ escape in a "d string.
251 7: After putting out a .app-file, put out string.
252 8: After putting out a .app-file string, flush until newline.
253 -1: output string in out_string and go to the state in old_state
254 -2: flush text until a '*' '/' is seen, then go to state old_state
255 */
256
257 register int ch, ch2 = 0;
258
259 switch (state)
260 {
261 case -1:
262 ch = *out_string++;
263 if (*out_string == 0)
264 {
265 state = old_state;
266 old_state = 3;
267 }
268 return ch;
269
270 case -2:
271 for (;;)
272 {
273 do
274 {
275 ch = (*get) ();
276 }
277 while (ch != EOF && ch != '\n' && ch != '*');
278 if (ch == '\n' || ch == EOF)
279 return ch;
280
281 /* At this point, ch must be a '*' */
282 while ((ch = (*get) ()) == '*')
283 {
284 ;
285 }
286 if (ch == EOF || ch == '/')
287 break;
288 (*unget) (ch);
289 }
290 state = old_state;
291 return ' ';
292
293 case 4:
294 ch = (*get) ();
295 if (ch == EOF || (ch >= '0' && ch <= '9'))
296 return ch;
297 else
298 {
299 while (ch != EOF && IS_WHITESPACE (ch))
300 ch = (*get) ();
301 if (ch == '"')
302 {
303 (*unget) (ch);
304 out_string = "\n.app-file ";
305 old_state = 7;
306 state = -1;
307 return *out_string++;
308 }
309 else
310 {
311 while (ch != EOF && ch != '\n')
312 ch = (*get) ();
313 return ch;
314 }
315 }
316
317 case 5:
318 ch = (*get) ();
319 if (lex[ch] == LEX_IS_STRINGQUOTE)
320 {
321 state = old_state;
322 return ch;
323 }
324 else if (ch == '\\')
325 {
326 state = 6;
327 return ch;
328 }
329 else if (ch == EOF)
330 {
331 as_warn ("End of file in string: inserted '\"'");
332 state = old_state;
333 (*unget) ('\n');
334 return '"';
335 }
336 else
337 {
338 return ch;
339 }
340
341 case 6:
342 state = 5;
343 ch = (*get) ();
344 switch (ch)
345 {
346 /* Handle strings broken across lines, by turning '\n' into
347 '\\' and 'n'. */
348 case '\n':
349 (*unget) ('n');
350 add_newlines++;
351 return '\\';
352
353 case '"':
354 case '\\':
355 case 'b':
356 case 'f':
357 case 'n':
358 case 'r':
359 case 't':
360 #ifdef BACKSLASH_V
361 case 'v':
362 #endif /* BACKSLASH_V */
363 case '0':
364 case '1':
365 case '2':
366 case '3':
367 case '4':
368 case '5':
369 case '6':
370 case '7':
371 break;
372 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
373 default:
374 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
375 break;
376 #else /* ONLY_STANDARD_ESCAPES */
377 default:
378 /* Accept \x as x for any x */
379 break;
380 #endif /* ONLY_STANDARD_ESCAPES */
381
382 case EOF:
383 as_warn ("End of file in string: '\"' inserted");
384 return '"';
385 }
386 return ch;
387
388 case 7:
389 ch = (*get) ();
390 state = 5;
391 old_state = 8;
392 return ch;
393
394 case 8:
395 do
396 ch = (*get) ();
397 while (ch != '\n');
398 state = 0;
399 return ch;
400 }
401
402 /* OK, we are somewhere in states 0 through 4 */
403
404 /* flushchar: */
405 ch = (*get) ();
406 recycle:
407 if (ch == EOF)
408 {
409 if (state != 0)
410 as_warn ("End of file not at end of a line: Newline inserted.");
411 return ch;
412 }
413
414 switch (lex[ch])
415 {
416 case LEX_IS_WHITESPACE:
417 do
418 ch = (*get) ();
419 while (ch != EOF && IS_WHITESPACE (ch));
420 if (ch == EOF)
421 return ch;
422
423 if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
424 {
425 goto recycle;
426 }
427 #ifdef MRI
428 (*unget) (ch); /* Put back */
429 return ' '; /* Always return one space at start of line */
430 #endif
431
432 /* If we're in state 2, we've seen a non-white
433 character followed by whitespace. If the next
434 character is ':', this is whitespace after a label
435 name which we can ignore. */
436 if (state == 2 && lex[ch] == LEX_IS_COLON)
437 {
438 state = 0;
439 return ch;
440 }
441
442 switch (state)
443 {
444 case 0:
445 state++;
446 goto recycle; /* Punted leading sp */
447 case 1:
448 BAD_CASE (state); /* We can't get here */
449 case 2:
450 state++;
451 (*unget) (ch);
452 return ' '; /* Sp after opco */
453 case 3:
454 goto recycle; /* Sp in operands */
455 default:
456 BAD_CASE (state);
457 }
458 break;
459
460 case LEX_IS_TWOCHAR_COMMENT_1ST:
461 ch2 = (*get) ();
462 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
463 {
464 for (;;)
465 {
466 do
467 {
468 ch2 = (*get) ();
469 if (ch2 != EOF && IS_NEWLINE (ch2))
470 add_newlines++;
471 }
472 while (ch2 != EOF &&
473 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
474
475 while (ch2 != EOF &&
476 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
477 {
478 ch2 = (*get) ();
479 }
480
481 if (ch2 == EOF
482 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
483 break;
484 (*unget) (ch);
485 }
486 if (ch2 == EOF)
487 as_warn ("End of file in multiline comment");
488
489 ch = ' ';
490 goto recycle;
491 }
492 else
493 {
494 if (ch2 != EOF)
495 (*unget) (ch2);
496 return ch;
497 }
498 break;
499
500 case LEX_IS_STRINGQUOTE:
501 old_state = state;
502 state = 5;
503 return ch;
504 #ifndef MRI
505 #ifndef IEEE_STYLE
506 case LEX_IS_ONECHAR_QUOTE:
507 ch = (*get) ();
508 if (ch == EOF)
509 {
510 as_warn ("End-of-file after a one-character quote; \\000 inserted");
511 ch = 0;
512 }
513 if (ch == '\\')
514 {
515 ch = (*get) ();
516 ch = process_escape (ch);
517 }
518 sprintf (out_buf, "%d", (int) (unsigned char) ch);
519
520
521 /* None of these 'x constants for us. We want 'x'.
522 */
523 if ((ch = (*get) ()) != '\'')
524 {
525 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
526 as_warn ("Missing close quote: (assumed)");
527 #else
528 (*unget) (ch);
529 #endif
530 }
531 if (strlen (out_buf) == 1)
532 {
533 return out_buf[0];
534 }
535 old_state = state;
536 state = -1;
537 out_string = out_buf;
538 return *out_string++;
539 #endif
540 #endif
541 case LEX_IS_COLON:
542 if (state != 3)
543 state = 0;
544 return ch;
545
546 case LEX_IS_NEWLINE:
547 /* Roll out a bunch of newlines from inside comments, etc. */
548 if (add_newlines)
549 {
550 --add_newlines;
551 (*unget) (ch);
552 }
553 /* fall thru into... */
554
555 case LEX_IS_LINE_SEPARATOR:
556 state = 0;
557 return ch;
558
559 case LEX_IS_LINE_COMMENT_START:
560 if (state != 0) /* Not at start of line, act normal */
561 goto de_fault;
562
563 /* FIXME-someday: The two character comment stuff was badly
564 thought out. On i386, we want '/' as line comment start
565 AND we want C style comments. hence this hack. The
566 whole lexical process should be reworked. xoxorich. */
567
568 if (ch == '/' && (ch2 = (*get) ()) == '*')
569 {
570 state = -2;
571 return (do_scrub_next_char (get, unget));
572 }
573 else
574 {
575 (*unget) (ch2);
576 } /* bad hack */
577
578 do
579 ch = (*get) ();
580 while (ch != EOF && IS_WHITESPACE (ch));
581 if (ch == EOF)
582 {
583 as_warn ("EOF in comment: Newline inserted");
584 return '\n';
585 }
586 if (ch < '0' || ch > '9')
587 {
588 /* Non-numerics: Eat whole comment line */
589 while (ch != EOF && !IS_NEWLINE (ch))
590 ch = (*get) ();
591 if (ch == EOF)
592 as_warn ("EOF in Comment: Newline inserted");
593 state = 0;
594 return '\n';
595 }
596 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
597 (*unget) (ch);
598 old_state = 4;
599 state = -1;
600 out_string = ".line ";
601 return *out_string++;
602
603 case LEX_IS_COMMENT_START:
604 do
605 ch = (*get) ();
606 while (ch != EOF && !IS_NEWLINE (ch));
607 if (ch == EOF)
608 as_warn ("EOF in comment: Newline inserted");
609 state = 0;
610 return '\n';
611
612 default:
613 de_fault:
614 /* Some relatively `normal' character. */
615 if (state == 0)
616 {
617 state = 2; /* Now seeing opcode */
618 return ch;
619 }
620 else if (state == 1)
621 {
622 state = 2; /* Ditto */
623 return ch;
624 }
625 else
626 {
627 return ch; /* Opcode or operands already */
628 }
629 }
630 return -1;
631 }
632
633 #ifdef TEST
634
635 const char comment_chars[] = "|";
636 const char line_comment_chars[] = "#";
637
638 main ()
639 {
640 int ch;
641
642 app_begin ();
643 while ((ch = do_scrub_next_char (stdin)) != EOF)
644 putc (ch, stdout);
645 }
646
647 as_warn (str)
648 char *str;
649 {
650 fputs (str, stderr);
651 putc ('\n', stderr);
652 }
653
654 #endif
655
656 /*
657 * Local Variables:
658 * comment-column: 0
659 * fill-column: 131
660 * End:
661 */
662
663 /* end of app.c */
This page took 0.04858 seconds and 4 git commands to generate.