Commit | Line | Data |
---|---|---|
3340f7e5 | 1 | /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc. |
6efd877d | 2 | |
a39116f1 RP |
3 | Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 |
4 | */ | |
fecd2382 RP |
5 | /* This is the Assembler Pre-Processor |
6 | Copyright (C) 1987 Free Software Foundation, Inc. | |
6efd877d | 7 | |
a39116f1 | 8 | This file is part of GAS, the GNU Assembler. |
6efd877d | 9 | |
a39116f1 RP |
10 | GAS is free software; you can redistribute it and/or modify |
11 | it under the terms of the GNU General Public License as published by | |
12 | the Free Software Foundation; either version 2, or (at your option) | |
13 | any later version. | |
6efd877d | 14 | |
a39116f1 RP |
15 | GAS is distributed in the hope that it will be useful, |
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | GNU General Public License for more details. | |
6efd877d | 19 | |
a39116f1 RP |
20 | You should have received a copy of the GNU General Public License |
21 | along with GAS; see the file COPYING. If not, write to | |
22 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ | |
fecd2382 RP |
23 | |
24 | /* App, the assembler pre-processor. This pre-processor strips out excess | |
25 | spaces, turns single-quoted characters into a decimal constant, and turns | |
9a7d824a | 26 | # <number> <filename> <garbage> into a .line <number>\n.file <filename> |
be06bdcd | 27 | pair. This needs better error-handling. |
a39116f1 | 28 | */ |
fecd2382 RP |
29 | |
30 | #include <stdio.h> | |
6efd877d | 31 | #include "as.h" /* For BAD_CASE() only */ |
fecd2382 | 32 | |
3340f7e5 | 33 | #if (__STDC__ != 1) && !defined(const) |
6efd877d | 34 | #define const /* Nothing */ |
fecd2382 RP |
35 | #endif |
36 | ||
6efd877d | 37 | static char lex[256]; |
6d331d71 | 38 | static const char symbol_chars[] = |
6efd877d | 39 | "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; |
fecd2382 RP |
40 | |
41 | #define LEX_IS_SYMBOL_COMPONENT 1 | |
42 | #define LEX_IS_WHITESPACE 2 | |
43 | #define LEX_IS_LINE_SEPARATOR 3 | |
44 | #define LEX_IS_COMMENT_START 4 | |
45 | #define LEX_IS_LINE_COMMENT_START 5 | |
46 | #define LEX_IS_TWOCHAR_COMMENT_1ST 6 | |
47 | #define LEX_IS_TWOCHAR_COMMENT_2ND 7 | |
48 | #define LEX_IS_STRINGQUOTE 8 | |
49 | #define LEX_IS_COLON 9 | |
50 | #define LEX_IS_NEWLINE 10 | |
51 | #define LEX_IS_ONECHAR_QUOTE 11 | |
a39116f1 RP |
52 | #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT) |
53 | #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE) | |
54 | #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR) | |
55 | #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START) | |
56 | #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START) | |
57 | #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE) | |
58 | ||
59 | /* FIXME-soon: The entire lexer/parser thingy should be | |
60 | built statically at compile time rather than dynamically | |
61 | each and every time the assembler is run. xoxorich. */ | |
fecd2382 | 62 | |
6efd877d KR |
63 | void |
64 | do_scrub_begin () | |
65 | { | |
66 | const char *p; | |
67 | ||
68 | lex[' '] = LEX_IS_WHITESPACE; | |
69 | lex['\t'] = LEX_IS_WHITESPACE; | |
70 | lex['\n'] = LEX_IS_NEWLINE; | |
71 | lex[';'] = LEX_IS_LINE_SEPARATOR; | |
72 | lex['"'] = LEX_IS_STRINGQUOTE; | |
73 | lex['\''] = LEX_IS_ONECHAR_QUOTE; | |
74 | lex[':'] = LEX_IS_COLON; | |
7c2d4011 | 75 | |
be06bdcd SC |
76 | |
77 | ||
78 | #ifdef SINGLE_QUOTE_STRINGS | |
79 | lex['\''] = LEX_IS_STRINGQUOTE; | |
7c2d4011 | 80 | #endif |
be06bdcd | 81 | |
6efd877d | 82 | /* Note that these override the previous defaults, e.g. if ';' |
be06bdcd | 83 | |
fecd2382 | 84 | is a comment char, then it isn't a line separator. */ |
6efd877d KR |
85 | for (p = symbol_chars; *p; ++p) |
86 | { | |
87 | lex[*p] = LEX_IS_SYMBOL_COMPONENT; | |
88 | } /* declare symbol characters */ | |
89 | ||
6efd877d KR |
90 | for (p = comment_chars; *p; p++) |
91 | { | |
92 | lex[*p] = LEX_IS_COMMENT_START; | |
93 | } /* declare comment chars */ | |
94 | ||
9a7d824a ILT |
95 | for (p = line_comment_chars; *p; p++) |
96 | { | |
97 | lex[*p] = LEX_IS_LINE_COMMENT_START; | |
98 | } /* declare line comment chars */ | |
99 | ||
6efd877d KR |
100 | for (p = line_separator_chars; *p; p++) |
101 | { | |
102 | lex[*p] = LEX_IS_LINE_SEPARATOR; | |
103 | } /* declare line separators */ | |
104 | ||
105 | /* Only allow slash-star comments if slash is not in use */ | |
106 | if (lex['/'] == 0) | |
107 | { | |
108 | lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST; | |
109 | } | |
110 | /* FIXME-soon. This is a bad hack but otherwise, we | |
a39116f1 RP |
111 | can't do c-style comments when '/' is a line |
112 | comment char. xoxorich. */ | |
6efd877d KR |
113 | if (lex['*'] == 0) |
114 | { | |
115 | lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND; | |
116 | } | |
117 | } /* do_scrub_begin() */ | |
fecd2382 RP |
118 | |
119 | FILE *scrub_file; | |
120 | ||
6efd877d KR |
121 | int |
122 | scrub_from_file () | |
123 | { | |
124 | return getc (scrub_file); | |
fecd2382 RP |
125 | } |
126 | ||
6efd877d KR |
127 | void |
128 | scrub_to_file (ch) | |
129 | int ch; | |
fecd2382 | 130 | { |
6efd877d KR |
131 | ungetc (ch, scrub_file); |
132 | } /* scrub_to_file() */ | |
fecd2382 RP |
133 | |
134 | char *scrub_string; | |
135 | char *scrub_last_string; | |
136 | ||
6efd877d KR |
137 | int |
138 | scrub_from_string () | |
139 | { | |
140 | return scrub_string == scrub_last_string ? EOF : *scrub_string++; | |
141 | } /* scrub_from_string() */ | |
fecd2382 | 142 | |
6efd877d KR |
143 | void |
144 | scrub_to_string (ch) | |
145 | int ch; | |
fecd2382 | 146 | { |
6efd877d KR |
147 | *--scrub_string = ch; |
148 | } /* scrub_to_string() */ | |
fecd2382 RP |
149 | |
150 | /* Saved state of the scrubber */ | |
151 | static int state; | |
152 | static int old_state; | |
153 | static char *out_string; | |
154 | static char out_buf[20]; | |
155 | static int add_newlines = 0; | |
156 | ||
157 | /* Data structure for saving the state of app across #include's. Note that | |
158 | app is called asynchronously to the parsing of the .include's, so our | |
159 | state at the time .include is interpreted is completely unrelated. | |
160 | That's why we have to save it all. */ | |
161 | ||
6efd877d KR |
162 | struct app_save |
163 | { | |
164 | int state; | |
165 | int old_state; | |
166 | char *out_string; | |
167 | char out_buf[sizeof (out_buf)]; | |
168 | int add_newlines; | |
169 | char *scrub_string; | |
170 | char *scrub_last_string; | |
171 | FILE *scrub_file; | |
172 | }; | |
173 | ||
174 | char * | |
175 | app_push () | |
176 | { | |
7c2d4011 SC |
177 | register struct app_save *saved; |
178 | ||
6efd877d KR |
179 | saved = (struct app_save *) xmalloc (sizeof (*saved)); |
180 | saved->state = state; | |
181 | saved->old_state = old_state; | |
182 | saved->out_string = out_string; | |
6d331d71 | 183 | memcpy (out_buf, saved->out_buf, sizeof (out_buf)); |
6efd877d KR |
184 | saved->add_newlines = add_newlines; |
185 | saved->scrub_string = scrub_string; | |
7c2d4011 | 186 | saved->scrub_last_string = scrub_last_string; |
6efd877d | 187 | saved->scrub_file = scrub_file; |
7c2d4011 SC |
188 | |
189 | /* do_scrub_begin() is not useful, just wastes time. */ | |
6efd877d | 190 | return (char *) saved; |
fecd2382 RP |
191 | } |
192 | ||
6efd877d KR |
193 | void |
194 | app_pop (arg) | |
195 | char *arg; | |
fecd2382 | 196 | { |
6efd877d KR |
197 | register struct app_save *saved = (struct app_save *) arg; |
198 | ||
199 | /* There is no do_scrub_end (). */ | |
200 | state = saved->state; | |
201 | old_state = saved->old_state; | |
202 | out_string = saved->out_string; | |
203 | memcpy (saved->out_buf, out_buf, sizeof (out_buf)); | |
204 | add_newlines = saved->add_newlines; | |
205 | scrub_string = saved->scrub_string; | |
206 | scrub_last_string = saved->scrub_last_string; | |
207 | scrub_file = saved->scrub_file; | |
208 | ||
209 | free (arg); | |
210 | } /* app_pop() */ | |
211 | ||
6d331d71 KR |
212 | /* @@ This assumes that \n &c are the same on host and target. This is not |
213 | necessarily true. */ | |
6efd877d KR |
214 | int |
215 | process_escape (ch) | |
216 | char ch; | |
7c2d4011 | 217 | { |
6efd877d KR |
218 | switch (ch) |
219 | { | |
220 | case 'b': | |
221 | return '\b'; | |
222 | case 'f': | |
223 | return '\f'; | |
224 | case 'n': | |
225 | return '\n'; | |
226 | case 'r': | |
227 | return '\r'; | |
228 | case 't': | |
229 | return '\t'; | |
230 | case '\'': | |
231 | return '\''; | |
232 | case '"': | |
6d331d71 | 233 | return '\"'; |
6efd877d KR |
234 | default: |
235 | return ch; | |
236 | } | |
7c2d4011 | 237 | } |
6efd877d KR |
238 | int |
239 | do_scrub_next_char (get, unget) | |
240 | int (*get) (); | |
241 | void (*unget) (); | |
fecd2382 | 242 | { |
6efd877d | 243 | /*State 0: beginning of normal line |
a39116f1 RP |
244 | 1: After first whitespace on line (flush more white) |
245 | 2: After first non-white (opcode) on line (keep 1white) | |
246 | 3: after second white on line (into operands) (flush white) | |
247 | 4: after putting out a .line, put out digits | |
248 | 5: parsing a string, then go to old-state | |
249 | 6: putting out \ escape in a "d string. | |
9a7d824a ILT |
250 | 7: After putting out a .appfile, put out string. |
251 | 8: After putting out a .appfile string, flush until newline. | |
f6a91cc0 | 252 | 9: After seeing symbol char in state 3 (keep 1white after symchar) |
9a7d824a | 253 | 10: After seeing whitespace in state 9 (keep white before symchar) |
a39116f1 RP |
254 | -1: output string in out_string and go to the state in old_state |
255 | -2: flush text until a '*' '/' is seen, then go to state old_state | |
256 | */ | |
6efd877d | 257 | |
9a7d824a ILT |
258 | /* I added states 9 and 10 because the MIPS ECOFF assembler uses |
259 | constructs like ``.loc 1 20''. This was turning into ``.loc | |
260 | 120''. States 9 and 10 ensure that a space is never dropped in | |
261 | between characters which could appear in a identifier. Ian | |
262 | Taylor, ian@cygnus.com. */ | |
f6a91cc0 | 263 | |
6efd877d KR |
264 | register int ch, ch2 = 0; |
265 | ||
266 | switch (state) | |
267 | { | |
268 | case -1: | |
269 | ch = *out_string++; | |
270 | if (*out_string == 0) | |
271 | { | |
272 | state = old_state; | |
273 | old_state = 3; | |
274 | } | |
275 | return ch; | |
276 | ||
277 | case -2: | |
278 | for (;;) | |
279 | { | |
280 | do | |
281 | { | |
282 | ch = (*get) (); | |
283 | } | |
284 | while (ch != EOF && ch != '\n' && ch != '*'); | |
285 | if (ch == '\n' || ch == EOF) | |
286 | return ch; | |
287 | ||
288 | /* At this point, ch must be a '*' */ | |
289 | while ((ch = (*get) ()) == '*') | |
290 | { | |
291 | ; | |
292 | } | |
293 | if (ch == EOF || ch == '/') | |
294 | break; | |
295 | (*unget) (ch); | |
296 | } | |
297 | state = old_state; | |
298 | return ' '; | |
299 | ||
300 | case 4: | |
301 | ch = (*get) (); | |
302 | if (ch == EOF || (ch >= '0' && ch <= '9')) | |
303 | return ch; | |
304 | else | |
305 | { | |
306 | while (ch != EOF && IS_WHITESPACE (ch)) | |
307 | ch = (*get) (); | |
308 | if (ch == '"') | |
309 | { | |
310 | (*unget) (ch); | |
9a7d824a | 311 | out_string = "\n.appfile "; |
6efd877d KR |
312 | old_state = 7; |
313 | state = -1; | |
314 | return *out_string++; | |
315 | } | |
316 | else | |
317 | { | |
318 | while (ch != EOF && ch != '\n') | |
319 | ch = (*get) (); | |
320 | return ch; | |
321 | } | |
322 | } | |
323 | ||
324 | case 5: | |
325 | ch = (*get) (); | |
326 | if (lex[ch] == LEX_IS_STRINGQUOTE) | |
327 | { | |
328 | state = old_state; | |
329 | return ch; | |
330 | } | |
331 | else if (ch == '\\') | |
332 | { | |
333 | state = 6; | |
334 | return ch; | |
335 | } | |
336 | else if (ch == EOF) | |
337 | { | |
338 | as_warn ("End of file in string: inserted '\"'"); | |
339 | state = old_state; | |
340 | (*unget) ('\n'); | |
341 | return '"'; | |
342 | } | |
343 | else | |
344 | { | |
345 | return ch; | |
346 | } | |
347 | ||
348 | case 6: | |
349 | state = 5; | |
350 | ch = (*get) (); | |
351 | switch (ch) | |
352 | { | |
6d331d71 KR |
353 | /* Handle strings broken across lines, by turning '\n' into |
354 | '\\' and 'n'. */ | |
6efd877d KR |
355 | case '\n': |
356 | (*unget) ('n'); | |
357 | add_newlines++; | |
358 | return '\\'; | |
359 | ||
360 | case '"': | |
361 | case '\\': | |
362 | case 'b': | |
363 | case 'f': | |
364 | case 'n': | |
365 | case 'r': | |
366 | case 't': | |
fecd2382 | 367 | #ifdef BACKSLASH_V |
6efd877d | 368 | case 'v': |
fecd2382 | 369 | #endif /* BACKSLASH_V */ |
6efd877d KR |
370 | case '0': |
371 | case '1': | |
372 | case '2': | |
373 | case '3': | |
374 | case '4': | |
375 | case '5': | |
376 | case '6': | |
377 | case '7': | |
378 | break; | |
7c2d4011 | 379 | #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES) |
6efd877d KR |
380 | default: |
381 | as_warn ("Unknown escape '\\%c' in string: Ignored", ch); | |
382 | break; | |
fecd2382 | 383 | #else /* ONLY_STANDARD_ESCAPES */ |
6efd877d KR |
384 | default: |
385 | /* Accept \x as x for any x */ | |
386 | break; | |
fecd2382 | 387 | #endif /* ONLY_STANDARD_ESCAPES */ |
7c2d4011 | 388 | |
6efd877d KR |
389 | case EOF: |
390 | as_warn ("End of file in string: '\"' inserted"); | |
391 | return '"'; | |
392 | } | |
393 | return ch; | |
394 | ||
395 | case 7: | |
396 | ch = (*get) (); | |
397 | state = 5; | |
398 | old_state = 8; | |
399 | return ch; | |
400 | ||
401 | case 8: | |
402 | do | |
403 | ch = (*get) (); | |
404 | while (ch != '\n'); | |
405 | state = 0; | |
406 | return ch; | |
407 | } | |
408 | ||
9a7d824a | 409 | /* OK, we are somewhere in states 0 through 4 or 9 through 10 */ |
6efd877d KR |
410 | |
411 | /* flushchar: */ | |
412 | ch = (*get) (); | |
413 | recycle: | |
414 | if (ch == EOF) | |
415 | { | |
416 | if (state != 0) | |
417 | as_warn ("End of file not at end of a line: Newline inserted."); | |
418 | return ch; | |
419 | } | |
420 | ||
421 | switch (lex[ch]) | |
422 | { | |
423 | case LEX_IS_WHITESPACE: | |
424 | do | |
425 | ch = (*get) (); | |
426 | while (ch != EOF && IS_WHITESPACE (ch)); | |
427 | if (ch == EOF) | |
428 | return ch; | |
429 | ||
430 | if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch)) | |
431 | { | |
432 | goto recycle; | |
fecd2382 | 433 | } |
7c2d4011 | 434 | #ifdef MRI |
6efd877d KR |
435 | (*unget) (ch); /* Put back */ |
436 | return ' '; /* Always return one space at start of line */ | |
7c2d4011 | 437 | #endif |
6efd877d KR |
438 | |
439 | /* If we're in state 2, we've seen a non-white | |
6d331d71 KR |
440 | character followed by whitespace. If the next |
441 | character is ':', this is whitespace after a label | |
442 | name which we can ignore. */ | |
6efd877d KR |
443 | if (state == 2 && lex[ch] == LEX_IS_COLON) |
444 | { | |
445 | state = 0; | |
446 | return ch; | |
447 | } | |
448 | ||
449 | switch (state) | |
450 | { | |
451 | case 0: | |
452 | state++; | |
453 | goto recycle; /* Punted leading sp */ | |
454 | case 1: | |
455 | BAD_CASE (state); /* We can't get here */ | |
456 | case 2: | |
f6a91cc0 | 457 | state = 3; |
6efd877d KR |
458 | (*unget) (ch); |
459 | return ' '; /* Sp after opco */ | |
460 | case 3: | |
461 | goto recycle; /* Sp in operands */ | |
9a7d824a ILT |
462 | case 9: |
463 | case 10: | |
464 | state = 10; /* Sp after symbol char */ | |
465 | goto recycle; | |
6efd877d KR |
466 | default: |
467 | BAD_CASE (state); | |
468 | } | |
469 | break; | |
470 | ||
471 | case LEX_IS_TWOCHAR_COMMENT_1ST: | |
472 | ch2 = (*get) (); | |
473 | if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) | |
474 | { | |
475 | for (;;) | |
476 | { | |
477 | do | |
478 | { | |
479 | ch2 = (*get) (); | |
480 | if (ch2 != EOF && IS_NEWLINE (ch2)) | |
481 | add_newlines++; | |
fecd2382 | 482 | } |
6efd877d KR |
483 | while (ch2 != EOF && |
484 | (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND)); | |
485 | ||
486 | while (ch2 != EOF && | |
487 | (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)) | |
488 | { | |
489 | ch2 = (*get) (); | |
fecd2382 | 490 | } |
6efd877d KR |
491 | |
492 | if (ch2 == EOF | |
493 | || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST) | |
fecd2382 | 494 | break; |
6efd877d KR |
495 | (*unget) (ch); |
496 | } | |
497 | if (ch2 == EOF) | |
498 | as_warn ("End of file in multiline comment"); | |
499 | ||
500 | ch = ' '; | |
501 | goto recycle; | |
502 | } | |
503 | else | |
504 | { | |
505 | if (ch2 != EOF) | |
506 | (*unget) (ch2); | |
9a7d824a ILT |
507 | if (state == 9 || state == 10) |
508 | state = 3; | |
6efd877d KR |
509 | return ch; |
510 | } | |
511 | break; | |
512 | ||
513 | case LEX_IS_STRINGQUOTE: | |
9a7d824a ILT |
514 | if (state == 9 || state == 10) |
515 | old_state = 3; | |
516 | else | |
517 | old_state = state; | |
6efd877d KR |
518 | state = 5; |
519 | return ch; | |
520 | #ifndef MRI | |
a39116f1 | 521 | #ifndef IEEE_STYLE |
6efd877d KR |
522 | case LEX_IS_ONECHAR_QUOTE: |
523 | ch = (*get) (); | |
524 | if (ch == EOF) | |
525 | { | |
526 | as_warn ("End-of-file after a one-character quote; \\000 inserted"); | |
527 | ch = 0; | |
528 | } | |
529 | if (ch == '\\') | |
530 | { | |
531 | ch = (*get) (); | |
532 | ch = process_escape (ch); | |
533 | } | |
534 | sprintf (out_buf, "%d", (int) (unsigned char) ch); | |
7c2d4011 | 535 | |
6efd877d | 536 | |
9a7d824a | 537 | /* None of these 'x constants for us. We want 'x'. */ |
6efd877d KR |
538 | if ((ch = (*get) ()) != '\'') |
539 | { | |
fecd2382 | 540 | #ifdef REQUIRE_CHAR_CLOSE_QUOTE |
6efd877d | 541 | as_warn ("Missing close quote: (assumed)"); |
fecd2382 | 542 | #else |
6efd877d | 543 | (*unget) (ch); |
fecd2382 | 544 | #endif |
6efd877d KR |
545 | } |
546 | if (strlen (out_buf) == 1) | |
547 | { | |
548 | return out_buf[0]; | |
549 | } | |
9a7d824a ILT |
550 | if (state == 9 || state == 10) |
551 | old_state = 3; | |
552 | else | |
553 | old_state = state; | |
6efd877d KR |
554 | state = -1; |
555 | out_string = out_buf; | |
556 | return *out_string++; | |
7c2d4011 | 557 | #endif |
a39116f1 | 558 | #endif |
6efd877d | 559 | case LEX_IS_COLON: |
9a7d824a ILT |
560 | if (state == 9 || state == 10) |
561 | state = 3; | |
562 | else if (state != 3) | |
6efd877d KR |
563 | state = 0; |
564 | return ch; | |
565 | ||
566 | case LEX_IS_NEWLINE: | |
567 | /* Roll out a bunch of newlines from inside comments, etc. */ | |
568 | if (add_newlines) | |
569 | { | |
570 | --add_newlines; | |
571 | (*unget) (ch); | |
572 | } | |
573 | /* fall thru into... */ | |
574 | ||
575 | case LEX_IS_LINE_SEPARATOR: | |
576 | state = 0; | |
577 | return ch; | |
578 | ||
579 | case LEX_IS_LINE_COMMENT_START: | |
9a7d824a | 580 | if (state == 0) /* Only comment at start of line. */ |
6efd877d | 581 | { |
9a7d824a ILT |
582 | /* FIXME-someday: The two character comment stuff was badly |
583 | thought out. On i386, we want '/' as line comment start | |
584 | AND we want C style comments. hence this hack. The | |
585 | whole lexical process should be reworked. xoxorich. */ | |
586 | if (ch == '/') | |
f6a91cc0 | 587 | { |
9a7d824a ILT |
588 | ch2 = (*get) (); |
589 | if (ch2 == '*') | |
590 | { | |
591 | state = -2; | |
592 | return (do_scrub_next_char (get, unget)); | |
593 | } | |
594 | else | |
595 | { | |
596 | (*unget) (ch2); | |
597 | } | |
598 | } /* bad hack */ | |
6efd877d | 599 | |
9a7d824a | 600 | do |
6efd877d | 601 | ch = (*get) (); |
9a7d824a | 602 | while (ch != EOF && IS_WHITESPACE (ch)); |
6efd877d | 603 | if (ch == EOF) |
9a7d824a ILT |
604 | { |
605 | as_warn ("EOF in comment: Newline inserted"); | |
606 | return '\n'; | |
607 | } | |
608 | if (ch < '0' || ch > '9') | |
609 | { | |
610 | /* Non-numerics: Eat whole comment line */ | |
611 | while (ch != EOF && !IS_NEWLINE (ch)) | |
612 | ch = (*get) (); | |
613 | if (ch == EOF) | |
614 | as_warn ("EOF in Comment: Newline inserted"); | |
615 | state = 0; | |
616 | return '\n'; | |
617 | } | |
618 | /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */ | |
619 | (*unget) (ch); | |
620 | old_state = 4; | |
621 | state = -1; | |
622 | out_string = ".appline "; | |
623 | return *out_string++; | |
6efd877d | 624 | } |
6efd877d | 625 | |
9a7d824a ILT |
626 | /* We have a line comment character which is not at the start of |
627 | a line. If this is also a normal comment character, fall | |
628 | through. Otherwise treat it as a default character. */ | |
629 | if (strchr (comment_chars, ch) == NULL) | |
630 | goto de_fault; | |
631 | /* Fall through. */ | |
6efd877d KR |
632 | case LEX_IS_COMMENT_START: |
633 | do | |
634 | ch = (*get) (); | |
635 | while (ch != EOF && !IS_NEWLINE (ch)); | |
636 | if (ch == EOF) | |
637 | as_warn ("EOF in comment: Newline inserted"); | |
638 | state = 0; | |
639 | return '\n'; | |
640 | ||
f6a91cc0 | 641 | case LEX_IS_SYMBOL_COMPONENT: |
9a7d824a ILT |
642 | if (state == 10) |
643 | { | |
644 | /* This is a symbol character following another symbol | |
645 | character, with whitespace in between. We skipped the | |
646 | whitespace earlier, so output it now. */ | |
647 | (*unget) (ch); | |
648 | state = 3; | |
649 | return ' '; | |
650 | } | |
f6a91cc0 ILT |
651 | if (state == 3) |
652 | state = 9; | |
653 | /* Fall through. */ | |
6efd877d KR |
654 | default: |
655 | de_fault: | |
656 | /* Some relatively `normal' character. */ | |
657 | if (state == 0) | |
658 | { | |
659 | state = 2; /* Now seeing opcode */ | |
660 | return ch; | |
fecd2382 | 661 | } |
6efd877d KR |
662 | else if (state == 1) |
663 | { | |
664 | state = 2; /* Ditto */ | |
665 | return ch; | |
666 | } | |
f6a91cc0 ILT |
667 | else if (state == 9) |
668 | { | |
669 | if (lex[ch] != LEX_IS_SYMBOL_COMPONENT) | |
670 | state = 3; | |
671 | return ch; | |
672 | } | |
9a7d824a ILT |
673 | else if (state == 10) |
674 | { | |
675 | state = 3; | |
676 | return ch; | |
677 | } | |
6efd877d KR |
678 | else |
679 | { | |
680 | return ch; /* Opcode or operands already */ | |
681 | } | |
682 | } | |
683 | return -1; | |
fecd2382 RP |
684 | } |
685 | ||
686 | #ifdef TEST | |
687 | ||
6efd877d KR |
688 | const char comment_chars[] = "|"; |
689 | const char line_comment_chars[] = "#"; | |
fecd2382 | 690 | |
6efd877d | 691 | main () |
fecd2382 | 692 | { |
6efd877d KR |
693 | int ch; |
694 | ||
695 | app_begin (); | |
696 | while ((ch = do_scrub_next_char (stdin)) != EOF) | |
697 | putc (ch, stdout); | |
fecd2382 RP |
698 | } |
699 | ||
6efd877d KR |
700 | as_warn (str) |
701 | char *str; | |
fecd2382 | 702 | { |
6efd877d KR |
703 | fputs (str, stderr); |
704 | putc ('\n', stderr); | |
fecd2382 | 705 | } |
6efd877d | 706 | |
fecd2382 RP |
707 | #endif |
708 | ||
fecd2382 | 709 | /* end of app.c */ |