i386v host/target/native separation
[deliverable/binutils-gdb.git] / gas / app.c
1 /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
2
3 Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
4 */
5 /* This is the Assembler Pre-Processor
6 Copyright (C) 1987 Free Software Foundation, Inc.
7
8 This file is part of GAS, the GNU Assembler.
9
10 GAS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 GAS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GAS; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
23
24 /* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
26 # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
27 This needs better error-handling.
28 */
29
30 #include <stdio.h>
31 #include "as.h" /* For BAD_CASE() only */
32
33 #if (__STDC__ != 1) && !defined(const)
34 #define const /* Nothing */
35 #endif
36
37 static char lex [256];
38 static char symbol_chars[] =
39 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
40
41 /* These will go in BSS if not defined elsewhere, producing empty strings. */
42 extern const char comment_chars[];
43 extern const char line_comment_chars[];
44 extern const char line_separator_chars[];
45
46 #define LEX_IS_SYMBOL_COMPONENT 1
47 #define LEX_IS_WHITESPACE 2
48 #define LEX_IS_LINE_SEPARATOR 3
49 #define LEX_IS_COMMENT_START 4
50 #define LEX_IS_LINE_COMMENT_START 5
51 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
52 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
53 #define LEX_IS_STRINGQUOTE 8
54 #define LEX_IS_COLON 9
55 #define LEX_IS_NEWLINE 10
56 #define LEX_IS_ONECHAR_QUOTE 11
57 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
58 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
59 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
60 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
61 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
62 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
63
64 /* FIXME-soon: The entire lexer/parser thingy should be
65 built statically at compile time rather than dynamically
66 each and every time the assembler is run. xoxorich. */
67
68 void do_scrub_begin() {
69 const char *p;
70
71 lex[' '] = LEX_IS_WHITESPACE;
72 lex['\t'] = LEX_IS_WHITESPACE;
73 lex['\n'] = LEX_IS_NEWLINE;
74 lex[';'] = LEX_IS_LINE_SEPARATOR;
75 lex['"'] = LEX_IS_STRINGQUOTE;
76 lex['\''] = LEX_IS_ONECHAR_QUOTE;
77 lex[':'] = LEX_IS_COLON;
78
79 #ifdef MRI
80 lex['\''] = LEX_IS_STRINGQUOTE;
81 #endif
82 /* Note that these override the previous defaults, e.g. if ';'
83 is a comment char, then it isn't a line separator. */
84 for (p = symbol_chars; *p; ++p) {
85 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
86 } /* declare symbol characters */
87
88 for (p = line_comment_chars; *p; p++) {
89 lex[*p] = LEX_IS_LINE_COMMENT_START;
90 } /* declare line comment chars */
91
92 for (p = comment_chars; *p; p++) {
93 lex[*p] = LEX_IS_COMMENT_START;
94 } /* declare comment chars */
95
96 for (p = line_separator_chars; *p; p++) {
97 lex[*p] = LEX_IS_LINE_SEPARATOR;
98 } /* declare line separators */
99
100 /* Only allow slash-star comments if slash is not in use */
101 if (lex['/'] == 0) {
102 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
103 }
104 /* FIXME-soon. This is a bad hack but otherwise, we
105 can't do c-style comments when '/' is a line
106 comment char. xoxorich. */
107 if (lex['*'] == 0) {
108 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
109 }
110 } /* do_scrub_begin() */
111
112 FILE *scrub_file;
113
114 int scrub_from_file() {
115 return getc(scrub_file);
116 }
117
118 void scrub_to_file(ch)
119 int ch;
120 {
121 ungetc(ch,scrub_file);
122 } /* scrub_to_file() */
123
124 char *scrub_string;
125 char *scrub_last_string;
126
127 int scrub_from_string() {
128 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
129 } /* scrub_from_string() */
130
131 void scrub_to_string(ch)
132 int ch;
133 {
134 *--scrub_string=ch;
135 } /* scrub_to_string() */
136
137 /* Saved state of the scrubber */
138 static int state;
139 static int old_state;
140 static char *out_string;
141 static char out_buf[20];
142 static int add_newlines = 0;
143
144 /* Data structure for saving the state of app across #include's. Note that
145 app is called asynchronously to the parsing of the .include's, so our
146 state at the time .include is interpreted is completely unrelated.
147 That's why we have to save it all. */
148
149 struct app_save {
150 int state;
151 int old_state;
152 char *out_string;
153 char out_buf[sizeof (out_buf)];
154 int add_newlines;
155 char *scrub_string;
156 char *scrub_last_string;
157 FILE *scrub_file;
158 };
159
160 char *app_push() {
161 register struct app_save *saved;
162
163 saved = (struct app_save *) xmalloc(sizeof (*saved));
164 saved->state = state;
165 saved->old_state = old_state;
166 saved->out_string = out_string;
167 bcopy(saved->out_buf, out_buf, sizeof(out_buf));
168 saved->add_newlines = add_newlines;
169 saved->scrub_string = scrub_string;
170 saved->scrub_last_string = scrub_last_string;
171 saved->scrub_file = scrub_file;
172
173 /* do_scrub_begin() is not useful, just wastes time. */
174 return (char *)saved;
175 }
176
177 void app_pop(arg)
178 char *arg;
179 {
180 register struct app_save *saved = (struct app_save *)arg;
181
182 /* There is no do_scrub_end (). */
183 state = saved->state;
184 old_state = saved->old_state;
185 out_string = saved->out_string;
186 memcpy(saved->out_buf, out_buf, sizeof (out_buf));
187 add_newlines = saved->add_newlines;
188 scrub_string = saved->scrub_string;
189 scrub_last_string = saved->scrub_last_string;
190 scrub_file = saved->scrub_file;
191
192 free (arg);
193 } /* app_pop() */
194
195 int process_escape(ch)
196 char ch;
197 {
198 switch (ch)
199 {
200 case 'b':
201 return '\b';
202 case 'f':
203 return '\f';
204 case 'n':
205 return '\n';
206 case 'r':
207 return '\r';
208 case 't':
209 return '\t';
210 case '\'':
211 return '\'';
212 case '"':
213 return '\'';
214 default:
215 return ch;
216 }
217 }
218 int do_scrub_next_char(get,unget)
219 int (*get)();
220 void (*unget)();
221 {
222 /*State 0: beginning of normal line
223 1: After first whitespace on line (flush more white)
224 2: After first non-white (opcode) on line (keep 1white)
225 3: after second white on line (into operands) (flush white)
226 4: after putting out a .line, put out digits
227 5: parsing a string, then go to old-state
228 6: putting out \ escape in a "d string.
229 7: After putting out a .app-file, put out string.
230 8: After putting out a .app-file string, flush until newline.
231 -1: output string in out_string and go to the state in old_state
232 -2: flush text until a '*' '/' is seen, then go to state old_state
233 */
234
235 register int ch, ch2 = 0;
236
237 switch (state) {
238 case -1:
239 ch= *out_string++;
240 if(*out_string==0) {
241 state=old_state;
242 old_state=3;
243 }
244 return ch;
245
246 case -2:
247 for(;;) {
248 do {
249 ch=(*get)();
250 } while(ch!=EOF && ch!='\n' && ch!='*');
251 if(ch=='\n' || ch==EOF)
252 return ch;
253
254 /* At this point, ch must be a '*' */
255 while ( (ch=(*get)()) == '*' ){
256 ;
257 }
258 if(ch==EOF || ch=='/')
259 break;
260 (*unget)(ch);
261 }
262 state=old_state;
263 return ' ';
264
265 case 4:
266 ch=(*get)();
267 if(ch==EOF || (ch>='0' && ch<='9'))
268 return ch;
269 else {
270 while(ch!=EOF && IS_WHITESPACE(ch))
271 ch=(*get)();
272 if(ch=='"') {
273 (*unget)(ch);
274 out_string="\n.app-file ";
275 old_state=7;
276 state= -1;
277 return *out_string++;
278 } else {
279 while(ch!=EOF && ch!='\n')
280 ch=(*get)();
281 return ch;
282 }
283 }
284
285 case 5:
286 ch=(*get)();
287 if(lex[ch]==LEX_IS_STRINGQUOTE) {
288 state=old_state;
289 return ch;
290 } else if(ch=='\\') {
291 state=6;
292 return ch;
293 } else if(ch==EOF) {
294 as_warn("End of file in string: inserted '\"'");
295 state=old_state;
296 (*unget)('\n');
297 return '"';
298 } else {
299 return ch;
300 }
301
302 case 6:
303 state=5;
304 ch=(*get)();
305 switch(ch) {
306 /* This is neet. Turn "string
307 more string" into "string\n more string"
308 */
309 case '\n':
310 (*unget)('n');
311 add_newlines++;
312 return '\\';
313
314 case '"':
315 case '\\':
316 case 'b':
317 case 'f':
318 case 'n':
319 case 'r':
320 case 't':
321 #ifdef BACKSLASH_V
322 case 'v':
323 #endif /* BACKSLASH_V */
324 case '0':
325 case '1':
326 case '2':
327 case '3':
328 case '4':
329 case '5':
330 case '6':
331 case '7':
332 break;
333 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
334 default:
335 as_warn("Unknown escape '\\%c' in string: Ignored",ch);
336 break;
337 #else /* ONLY_STANDARD_ESCAPES */
338 default:
339 /* Accept \x as x for any x */
340 break;
341 #endif /* ONLY_STANDARD_ESCAPES */
342
343 case EOF:
344 as_warn("End of file in string: '\"' inserted");
345 return '"';
346 }
347 return ch;
348
349 case 7:
350 ch=(*get)();
351 state=5;
352 old_state=8;
353 return ch;
354
355 case 8:
356 do ch= (*get)();
357 while(ch!='\n');
358 state=0;
359 return ch;
360 }
361
362 /* OK, we are somewhere in states 0 through 4 */
363
364 /* flushchar: */
365 ch=(*get)();
366 recycle:
367 if (ch == EOF) {
368 if (state != 0)
369 as_warn("End of file not at end of a line: Newline inserted.");
370 return ch;
371 }
372
373 switch (lex[ch]) {
374 case LEX_IS_WHITESPACE:
375 do ch=(*get)();
376 while(ch!=EOF && IS_WHITESPACE(ch));
377 if(ch==EOF)
378 return ch;
379
380 if(IS_COMMENT(ch) || (state==0 && IS_LINE_COMMENT(ch)) || ch=='/' || IS_LINE_SEPARATOR(ch)) {
381 goto recycle;
382 }
383 #ifdef MRI
384 (*unget)(ch); /* Put back */
385 return ' '; /* Always return one space at start of line */
386 #endif
387 switch (state) {
388 case 0: state++; goto recycle; /* Punted leading sp */
389 case 1: BAD_CASE(state); /* We can't get here */
390 case 2: state++; (*unget)(ch); return ' '; /* Sp after opco */
391 case 3: goto recycle; /* Sp in operands */
392 default: BAD_CASE(state);
393 }
394 break;
395
396 case LEX_IS_TWOCHAR_COMMENT_1ST:
397 ch2=(*get)();
398 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) {
399 for(;;) {
400 do {
401 ch2=(*get)();
402 if(ch2 != EOF && IS_NEWLINE(ch2))
403 add_newlines++;
404 } while(ch2!=EOF &&
405 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
406
407 while (ch2!=EOF &&
408 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)){
409 ch2=(*get)();
410 }
411
412 if(ch2==EOF
413 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
414 break;
415 (*unget)(ch);
416 }
417 if(ch2==EOF)
418 as_warn("End of file in multiline comment");
419
420 ch = ' ';
421 goto recycle;
422 } else {
423 if(ch2!=EOF)
424 (*unget)(ch2);
425 return ch;
426 }
427 break;
428
429 case LEX_IS_STRINGQUOTE:
430 old_state=state;
431 state=5;
432 return ch;
433 #ifndef MRI
434 #ifndef IEEE_STYLE
435 case LEX_IS_ONECHAR_QUOTE:
436 ch=(*get)();
437 if(ch==EOF) {
438 as_warn("End-of-file after a one-character quote; \000 inserted");
439 ch=0;
440 }
441 if (ch == '\\') {
442 ch = (*get)();
443 ch = process_escape(ch);
444 }
445 sprintf(out_buf,"%d", (int)(unsigned char)ch);
446
447 /* None of these 'x constants for us. We want 'x'.
448 */
449 if ( (ch=(*get)()) != '\'' ) {
450 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
451 as_warn("Missing close quote: (assumed)");
452 #else
453 (*unget)(ch);
454 #endif
455 }
456
457 old_state=state;
458 state= -1;
459 out_string=out_buf;
460 return *out_string++;
461 #endif
462 #endif
463 case LEX_IS_COLON:
464 if(state!=3)
465 state=0;
466 return ch;
467
468 case LEX_IS_NEWLINE:
469 /* Roll out a bunch of newlines from inside comments, etc. */
470 if(add_newlines) {
471 --add_newlines;
472 (*unget)(ch);
473 }
474 /* fall thru into... */
475
476 case LEX_IS_LINE_SEPARATOR:
477 state=0;
478 return ch;
479
480 case LEX_IS_LINE_COMMENT_START:
481 if (state != 0) /* Not at start of line, act normal */
482 goto de_fault;
483
484 /* FIXME-someday: The two character comment stuff was badly
485 thought out. On i386, we want '/' as line comment start
486 AND we want C style comments. hence this hack. The
487 whole lexical process should be reworked. xoxorich. */
488
489 if (ch == '/' && (ch2 = (*get)()) == '*') {
490 state = -2;
491 return(do_scrub_next_char(get, unget));
492 } else {
493 (*unget)(ch2);
494 } /* bad hack */
495
496 do ch=(*get)();
497 while(ch!=EOF && IS_WHITESPACE(ch));
498 if(ch==EOF) {
499 as_warn("EOF in comment: Newline inserted");
500 return '\n';
501 }
502 if(ch<'0' || ch>'9') {
503 /* Non-numerics: Eat whole comment line */
504 while(ch!=EOF && !IS_NEWLINE(ch))
505 ch=(*get)();
506 if(ch==EOF)
507 as_warn("EOF in Comment: Newline inserted");
508 state=0;
509 return '\n';
510 }
511 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
512 (*unget)(ch);
513 old_state=4;
514 state= -1;
515 out_string=".line ";
516 return *out_string++;
517
518 case LEX_IS_COMMENT_START:
519 do ch=(*get)();
520 while(ch!=EOF && !IS_NEWLINE(ch));
521 if(ch==EOF)
522 as_warn("EOF in comment: Newline inserted");
523 state=0;
524 return '\n';
525
526 default:
527 de_fault:
528 /* Some relatively `normal' character. */
529 if(state==0) {
530 state=2; /* Now seeing opcode */
531 return ch;
532 } else if(state==1) {
533 state=2; /* Ditto */
534 return ch;
535 } else {
536 return ch; /* Opcode or operands already */
537 }
538 }
539 return -1;
540 }
541
542 #ifdef TEST
543
544 char comment_chars[] = "|";
545 char line_comment_chars[] = "#";
546
547 main()
548 {
549 int ch;
550
551 app_begin();
552 while((ch=do_scrub_next_char(stdin))!=EOF)
553 putc(ch,stdout);
554 }
555
556 as_warn(str)
557 char *str;
558 {
559 fputs(str,stderr);
560 putc('\n',stderr);
561 }
562 #endif
563
564 /*
565 * Local Variables:
566 * comment-column: 0
567 * fill-column: 131
568 * End:
569 */
570
571 /* end of app.c */
This page took 0.046429 seconds and 4 git commands to generate.