Commit | Line | Data |
---|---|---|
fecd2382 | 1 | /* This is the Assembler Pre-Processor |
58d4951d | 2 | Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc. |
6efd877d | 3 | |
a39116f1 | 4 | This file is part of GAS, the GNU Assembler. |
6efd877d | 5 | |
a39116f1 RP |
6 | GAS is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 2, or (at your option) | |
9 | any later version. | |
6efd877d | 10 | |
a39116f1 RP |
11 | GAS is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
6efd877d | 15 | |
a39116f1 RP |
16 | You should have received a copy of the GNU General Public License |
17 | along with GAS; see the file COPYING. If not, write to | |
18 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ | |
fecd2382 | 19 | |
58d4951d | 20 | /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */ |
fecd2382 RP |
21 | /* App, the assembler pre-processor. This pre-processor strips out excess |
22 | spaces, turns single-quoted characters into a decimal constant, and turns | |
9a7d824a | 23 | # <number> <filename> <garbage> into a .line <number>\n.file <filename> |
be06bdcd | 24 | pair. This needs better error-handling. |
a39116f1 | 25 | */ |
fecd2382 RP |
26 | |
27 | #include <stdio.h> | |
6efd877d | 28 | #include "as.h" /* For BAD_CASE() only */ |
fecd2382 | 29 | |
3340f7e5 | 30 | #if (__STDC__ != 1) && !defined(const) |
6efd877d | 31 | #define const /* Nothing */ |
fecd2382 RP |
32 | #endif |
33 | ||
6efd877d | 34 | static char lex[256]; |
6d331d71 | 35 | static const char symbol_chars[] = |
6efd877d | 36 | "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; |
fecd2382 RP |
37 | |
38 | #define LEX_IS_SYMBOL_COMPONENT 1 | |
39 | #define LEX_IS_WHITESPACE 2 | |
40 | #define LEX_IS_LINE_SEPARATOR 3 | |
41 | #define LEX_IS_COMMENT_START 4 | |
42 | #define LEX_IS_LINE_COMMENT_START 5 | |
43 | #define LEX_IS_TWOCHAR_COMMENT_1ST 6 | |
44 | #define LEX_IS_TWOCHAR_COMMENT_2ND 7 | |
45 | #define LEX_IS_STRINGQUOTE 8 | |
46 | #define LEX_IS_COLON 9 | |
47 | #define LEX_IS_NEWLINE 10 | |
48 | #define LEX_IS_ONECHAR_QUOTE 11 | |
a39116f1 RP |
49 | #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT) |
50 | #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE) | |
51 | #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR) | |
52 | #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START) | |
53 | #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START) | |
54 | #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE) | |
55 | ||
385ce433 JL |
56 | static int process_escape PARAMS ((int)); |
57 | ||
a39116f1 RP |
58 | /* FIXME-soon: The entire lexer/parser thingy should be |
59 | built statically at compile time rather than dynamically | |
60 | each and every time the assembler is run. xoxorich. */ | |
fecd2382 | 61 | |
6efd877d KR |
62 | void |
63 | do_scrub_begin () | |
64 | { | |
65 | const char *p; | |
66 | ||
67 | lex[' '] = LEX_IS_WHITESPACE; | |
68 | lex['\t'] = LEX_IS_WHITESPACE; | |
69 | lex['\n'] = LEX_IS_NEWLINE; | |
70 | lex[';'] = LEX_IS_LINE_SEPARATOR; | |
71 | lex['"'] = LEX_IS_STRINGQUOTE; | |
58d4951d | 72 | #ifndef TC_HPPA |
6efd877d | 73 | lex['\''] = LEX_IS_ONECHAR_QUOTE; |
58d4951d | 74 | #endif |
6efd877d | 75 | lex[':'] = LEX_IS_COLON; |
7c2d4011 | 76 | |
be06bdcd SC |
77 | |
78 | ||
79 | #ifdef SINGLE_QUOTE_STRINGS | |
80 | lex['\''] = LEX_IS_STRINGQUOTE; | |
7c2d4011 | 81 | #endif |
be06bdcd | 82 | |
6efd877d | 83 | /* Note that these override the previous defaults, e.g. if ';' |
be06bdcd | 84 | |
fecd2382 | 85 | is a comment char, then it isn't a line separator. */ |
6efd877d KR |
86 | for (p = symbol_chars; *p; ++p) |
87 | { | |
58d4951d | 88 | lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; |
6efd877d KR |
89 | } /* declare symbol characters */ |
90 | ||
6efd877d KR |
91 | for (p = comment_chars; *p; p++) |
92 | { | |
58d4951d | 93 | lex[(unsigned char) *p] = LEX_IS_COMMENT_START; |
6efd877d KR |
94 | } /* declare comment chars */ |
95 | ||
9a7d824a ILT |
96 | for (p = line_comment_chars; *p; p++) |
97 | { | |
58d4951d | 98 | lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START; |
9a7d824a ILT |
99 | } /* declare line comment chars */ |
100 | ||
6efd877d KR |
101 | for (p = line_separator_chars; *p; p++) |
102 | { | |
58d4951d | 103 | lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR; |
6efd877d KR |
104 | } /* declare line separators */ |
105 | ||
106 | /* Only allow slash-star comments if slash is not in use */ | |
107 | if (lex['/'] == 0) | |
108 | { | |
109 | lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST; | |
110 | } | |
111 | /* FIXME-soon. This is a bad hack but otherwise, we | |
a39116f1 RP |
112 | can't do c-style comments when '/' is a line |
113 | comment char. xoxorich. */ | |
6efd877d KR |
114 | if (lex['*'] == 0) |
115 | { | |
116 | lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND; | |
117 | } | |
118 | } /* do_scrub_begin() */ | |
fecd2382 RP |
119 | |
120 | FILE *scrub_file; | |
121 | ||
6efd877d KR |
122 | int |
123 | scrub_from_file () | |
124 | { | |
125 | return getc (scrub_file); | |
fecd2382 RP |
126 | } |
127 | ||
6efd877d KR |
128 | void |
129 | scrub_to_file (ch) | |
130 | int ch; | |
fecd2382 | 131 | { |
6efd877d KR |
132 | ungetc (ch, scrub_file); |
133 | } /* scrub_to_file() */ | |
fecd2382 RP |
134 | |
135 | char *scrub_string; | |
136 | char *scrub_last_string; | |
137 | ||
6efd877d KR |
138 | int |
139 | scrub_from_string () | |
140 | { | |
141 | return scrub_string == scrub_last_string ? EOF : *scrub_string++; | |
142 | } /* scrub_from_string() */ | |
fecd2382 | 143 | |
6efd877d KR |
144 | void |
145 | scrub_to_string (ch) | |
146 | int ch; | |
fecd2382 | 147 | { |
6efd877d KR |
148 | *--scrub_string = ch; |
149 | } /* scrub_to_string() */ | |
fecd2382 RP |
150 | |
151 | /* Saved state of the scrubber */ | |
152 | static int state; | |
153 | static int old_state; | |
154 | static char *out_string; | |
155 | static char out_buf[20]; | |
156 | static int add_newlines = 0; | |
157 | ||
158 | /* Data structure for saving the state of app across #include's. Note that | |
159 | app is called asynchronously to the parsing of the .include's, so our | |
160 | state at the time .include is interpreted is completely unrelated. | |
161 | That's why we have to save it all. */ | |
162 | ||
6efd877d KR |
163 | struct app_save |
164 | { | |
165 | int state; | |
166 | int old_state; | |
167 | char *out_string; | |
168 | char out_buf[sizeof (out_buf)]; | |
169 | int add_newlines; | |
170 | char *scrub_string; | |
171 | char *scrub_last_string; | |
172 | FILE *scrub_file; | |
173 | }; | |
174 | ||
175 | char * | |
176 | app_push () | |
177 | { | |
7c2d4011 SC |
178 | register struct app_save *saved; |
179 | ||
6efd877d KR |
180 | saved = (struct app_save *) xmalloc (sizeof (*saved)); |
181 | saved->state = state; | |
182 | saved->old_state = old_state; | |
183 | saved->out_string = out_string; | |
58d4951d | 184 | memcpy (saved->out_buf, out_buf, sizeof (out_buf)); |
6efd877d KR |
185 | saved->add_newlines = add_newlines; |
186 | saved->scrub_string = scrub_string; | |
7c2d4011 | 187 | saved->scrub_last_string = scrub_last_string; |
6efd877d | 188 | saved->scrub_file = scrub_file; |
7c2d4011 SC |
189 | |
190 | /* do_scrub_begin() is not useful, just wastes time. */ | |
6efd877d | 191 | return (char *) saved; |
fecd2382 RP |
192 | } |
193 | ||
6efd877d KR |
194 | void |
195 | app_pop (arg) | |
196 | char *arg; | |
fecd2382 | 197 | { |
6efd877d KR |
198 | register struct app_save *saved = (struct app_save *) arg; |
199 | ||
200 | /* There is no do_scrub_end (). */ | |
201 | state = saved->state; | |
202 | old_state = saved->old_state; | |
203 | out_string = saved->out_string; | |
58d4951d | 204 | memcpy (out_buf, saved->out_buf, sizeof (out_buf)); |
6efd877d KR |
205 | add_newlines = saved->add_newlines; |
206 | scrub_string = saved->scrub_string; | |
207 | scrub_last_string = saved->scrub_last_string; | |
208 | scrub_file = saved->scrub_file; | |
209 | ||
210 | free (arg); | |
211 | } /* app_pop() */ | |
212 | ||
6d331d71 KR |
213 | /* @@ This assumes that \n &c are the same on host and target. This is not |
214 | necessarily true. */ | |
385ce433 | 215 | static int |
6efd877d | 216 | process_escape (ch) |
385ce433 | 217 | int ch; |
7c2d4011 | 218 | { |
6efd877d KR |
219 | switch (ch) |
220 | { | |
221 | case 'b': | |
222 | return '\b'; | |
223 | case 'f': | |
224 | return '\f'; | |
225 | case 'n': | |
226 | return '\n'; | |
227 | case 'r': | |
228 | return '\r'; | |
229 | case 't': | |
230 | return '\t'; | |
231 | case '\'': | |
232 | return '\''; | |
233 | case '"': | |
6d331d71 | 234 | return '\"'; |
6efd877d KR |
235 | default: |
236 | return ch; | |
237 | } | |
7c2d4011 | 238 | } |
6efd877d KR |
239 | int |
240 | do_scrub_next_char (get, unget) | |
241 | int (*get) (); | |
242 | void (*unget) (); | |
fecd2382 | 243 | { |
6efd877d | 244 | /*State 0: beginning of normal line |
a39116f1 RP |
245 | 1: After first whitespace on line (flush more white) |
246 | 2: After first non-white (opcode) on line (keep 1white) | |
247 | 3: after second white on line (into operands) (flush white) | |
248 | 4: after putting out a .line, put out digits | |
249 | 5: parsing a string, then go to old-state | |
250 | 6: putting out \ escape in a "d string. | |
9a7d824a ILT |
251 | 7: After putting out a .appfile, put out string. |
252 | 8: After putting out a .appfile string, flush until newline. | |
f6a91cc0 | 253 | 9: After seeing symbol char in state 3 (keep 1white after symchar) |
9a7d824a | 254 | 10: After seeing whitespace in state 9 (keep white before symchar) |
a39116f1 RP |
255 | -1: output string in out_string and go to the state in old_state |
256 | -2: flush text until a '*' '/' is seen, then go to state old_state | |
257 | */ | |
6efd877d | 258 | |
9a7d824a ILT |
259 | /* I added states 9 and 10 because the MIPS ECOFF assembler uses |
260 | constructs like ``.loc 1 20''. This was turning into ``.loc | |
261 | 120''. States 9 and 10 ensure that a space is never dropped in | |
262 | between characters which could appear in a identifier. Ian | |
263 | Taylor, ian@cygnus.com. */ | |
f6a91cc0 | 264 | |
6efd877d | 265 | register int ch, ch2 = 0; |
385ce433 | 266 | int not_cpp_line = 0; |
6efd877d KR |
267 | |
268 | switch (state) | |
269 | { | |
270 | case -1: | |
271 | ch = *out_string++; | |
272 | if (*out_string == 0) | |
273 | { | |
274 | state = old_state; | |
275 | old_state = 3; | |
276 | } | |
277 | return ch; | |
278 | ||
279 | case -2: | |
280 | for (;;) | |
281 | { | |
282 | do | |
283 | { | |
284 | ch = (*get) (); | |
285 | } | |
286 | while (ch != EOF && ch != '\n' && ch != '*'); | |
287 | if (ch == '\n' || ch == EOF) | |
288 | return ch; | |
289 | ||
290 | /* At this point, ch must be a '*' */ | |
291 | while ((ch = (*get) ()) == '*') | |
292 | { | |
293 | ; | |
294 | } | |
295 | if (ch == EOF || ch == '/') | |
296 | break; | |
297 | (*unget) (ch); | |
298 | } | |
299 | state = old_state; | |
300 | return ' '; | |
301 | ||
302 | case 4: | |
303 | ch = (*get) (); | |
304 | if (ch == EOF || (ch >= '0' && ch <= '9')) | |
305 | return ch; | |
306 | else | |
307 | { | |
308 | while (ch != EOF && IS_WHITESPACE (ch)) | |
309 | ch = (*get) (); | |
310 | if (ch == '"') | |
311 | { | |
312 | (*unget) (ch); | |
9a7d824a | 313 | out_string = "\n.appfile "; |
6efd877d KR |
314 | old_state = 7; |
315 | state = -1; | |
316 | return *out_string++; | |
317 | } | |
318 | else | |
319 | { | |
320 | while (ch != EOF && ch != '\n') | |
321 | ch = (*get) (); | |
58d4951d | 322 | state = 0; |
6efd877d KR |
323 | return ch; |
324 | } | |
325 | } | |
326 | ||
327 | case 5: | |
328 | ch = (*get) (); | |
329 | if (lex[ch] == LEX_IS_STRINGQUOTE) | |
330 | { | |
331 | state = old_state; | |
332 | return ch; | |
333 | } | |
334 | else if (ch == '\\') | |
335 | { | |
336 | state = 6; | |
337 | return ch; | |
338 | } | |
339 | else if (ch == EOF) | |
340 | { | |
341 | as_warn ("End of file in string: inserted '\"'"); | |
342 | state = old_state; | |
343 | (*unget) ('\n'); | |
344 | return '"'; | |
345 | } | |
346 | else | |
347 | { | |
348 | return ch; | |
349 | } | |
350 | ||
351 | case 6: | |
352 | state = 5; | |
353 | ch = (*get) (); | |
354 | switch (ch) | |
355 | { | |
6d331d71 KR |
356 | /* Handle strings broken across lines, by turning '\n' into |
357 | '\\' and 'n'. */ | |
6efd877d KR |
358 | case '\n': |
359 | (*unget) ('n'); | |
360 | add_newlines++; | |
361 | return '\\'; | |
362 | ||
363 | case '"': | |
364 | case '\\': | |
365 | case 'b': | |
366 | case 'f': | |
367 | case 'n': | |
368 | case 'r': | |
369 | case 't': | |
fecd2382 | 370 | #ifdef BACKSLASH_V |
6efd877d | 371 | case 'v': |
fecd2382 | 372 | #endif /* BACKSLASH_V */ |
385ce433 JL |
373 | case 'x': |
374 | case 'X': | |
6efd877d KR |
375 | case '0': |
376 | case '1': | |
377 | case '2': | |
378 | case '3': | |
379 | case '4': | |
380 | case '5': | |
381 | case '6': | |
382 | case '7': | |
383 | break; | |
7c2d4011 | 384 | #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES) |
6efd877d KR |
385 | default: |
386 | as_warn ("Unknown escape '\\%c' in string: Ignored", ch); | |
387 | break; | |
fecd2382 | 388 | #else /* ONLY_STANDARD_ESCAPES */ |
6efd877d KR |
389 | default: |
390 | /* Accept \x as x for any x */ | |
391 | break; | |
fecd2382 | 392 | #endif /* ONLY_STANDARD_ESCAPES */ |
7c2d4011 | 393 | |
6efd877d KR |
394 | case EOF: |
395 | as_warn ("End of file in string: '\"' inserted"); | |
396 | return '"'; | |
397 | } | |
398 | return ch; | |
399 | ||
400 | case 7: | |
401 | ch = (*get) (); | |
402 | state = 5; | |
403 | old_state = 8; | |
404 | return ch; | |
405 | ||
406 | case 8: | |
407 | do | |
408 | ch = (*get) (); | |
409 | while (ch != '\n'); | |
410 | state = 0; | |
411 | return ch; | |
412 | } | |
413 | ||
9a7d824a | 414 | /* OK, we are somewhere in states 0 through 4 or 9 through 10 */ |
6efd877d KR |
415 | |
416 | /* flushchar: */ | |
417 | ch = (*get) (); | |
418 | recycle: | |
419 | if (ch == EOF) | |
420 | { | |
421 | if (state != 0) | |
422 | as_warn ("End of file not at end of a line: Newline inserted."); | |
423 | return ch; | |
424 | } | |
425 | ||
426 | switch (lex[ch]) | |
427 | { | |
428 | case LEX_IS_WHITESPACE: | |
429 | do | |
385ce433 JL |
430 | /* Preserve a single whitespace character at the beginning of |
431 | a line. */ | |
432 | if (state == 0) | |
433 | { | |
434 | state = 1; | |
435 | return ch; | |
436 | } | |
437 | else | |
438 | ch = (*get) (); | |
6efd877d KR |
439 | while (ch != EOF && IS_WHITESPACE (ch)); |
440 | if (ch == EOF) | |
441 | return ch; | |
442 | ||
443 | if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch)) | |
444 | { | |
385ce433 JL |
445 | /* cpp never outputs a leading space before the #, so try to |
446 | avoid being confused. */ | |
447 | not_cpp_line = 1; | |
6efd877d | 448 | goto recycle; |
fecd2382 | 449 | } |
7c2d4011 | 450 | #ifdef MRI |
6efd877d KR |
451 | (*unget) (ch); /* Put back */ |
452 | return ' '; /* Always return one space at start of line */ | |
7c2d4011 | 453 | #endif |
6efd877d KR |
454 | |
455 | /* If we're in state 2, we've seen a non-white | |
6d331d71 KR |
456 | character followed by whitespace. If the next |
457 | character is ':', this is whitespace after a label | |
458 | name which we can ignore. */ | |
6efd877d KR |
459 | if (state == 2 && lex[ch] == LEX_IS_COLON) |
460 | { | |
461 | state = 0; | |
462 | return ch; | |
463 | } | |
464 | ||
465 | switch (state) | |
466 | { | |
467 | case 0: | |
468 | state++; | |
469 | goto recycle; /* Punted leading sp */ | |
470 | case 1: | |
385ce433 JL |
471 | /* We can arrive here if we leave a leading whitespace character |
472 | at the beginning of a line. */ | |
473 | goto recycle; | |
6efd877d | 474 | case 2: |
f6a91cc0 | 475 | state = 3; |
6efd877d KR |
476 | (*unget) (ch); |
477 | return ' '; /* Sp after opco */ | |
478 | case 3: | |
479 | goto recycle; /* Sp in operands */ | |
9a7d824a ILT |
480 | case 9: |
481 | case 10: | |
482 | state = 10; /* Sp after symbol char */ | |
483 | goto recycle; | |
6efd877d KR |
484 | default: |
485 | BAD_CASE (state); | |
486 | } | |
487 | break; | |
488 | ||
489 | case LEX_IS_TWOCHAR_COMMENT_1ST: | |
490 | ch2 = (*get) (); | |
491 | if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) | |
492 | { | |
493 | for (;;) | |
494 | { | |
495 | do | |
496 | { | |
497 | ch2 = (*get) (); | |
498 | if (ch2 != EOF && IS_NEWLINE (ch2)) | |
499 | add_newlines++; | |
fecd2382 | 500 | } |
6efd877d KR |
501 | while (ch2 != EOF && |
502 | (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND)); | |
503 | ||
504 | while (ch2 != EOF && | |
505 | (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)) | |
506 | { | |
507 | ch2 = (*get) (); | |
fecd2382 | 508 | } |
6efd877d KR |
509 | |
510 | if (ch2 == EOF | |
511 | || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST) | |
fecd2382 | 512 | break; |
6efd877d KR |
513 | (*unget) (ch); |
514 | } | |
515 | if (ch2 == EOF) | |
516 | as_warn ("End of file in multiline comment"); | |
517 | ||
518 | ch = ' '; | |
519 | goto recycle; | |
520 | } | |
521 | else | |
522 | { | |
523 | if (ch2 != EOF) | |
524 | (*unget) (ch2); | |
9a7d824a ILT |
525 | if (state == 9 || state == 10) |
526 | state = 3; | |
6efd877d KR |
527 | return ch; |
528 | } | |
529 | break; | |
530 | ||
531 | case LEX_IS_STRINGQUOTE: | |
9a7d824a ILT |
532 | if (state == 9 || state == 10) |
533 | old_state = 3; | |
534 | else | |
535 | old_state = state; | |
6efd877d KR |
536 | state = 5; |
537 | return ch; | |
538 | #ifndef MRI | |
a39116f1 | 539 | #ifndef IEEE_STYLE |
6efd877d KR |
540 | case LEX_IS_ONECHAR_QUOTE: |
541 | ch = (*get) (); | |
542 | if (ch == EOF) | |
543 | { | |
544 | as_warn ("End-of-file after a one-character quote; \\000 inserted"); | |
545 | ch = 0; | |
546 | } | |
547 | if (ch == '\\') | |
548 | { | |
549 | ch = (*get) (); | |
550 | ch = process_escape (ch); | |
551 | } | |
552 | sprintf (out_buf, "%d", (int) (unsigned char) ch); | |
7c2d4011 | 553 | |
6efd877d | 554 | |
9a7d824a | 555 | /* None of these 'x constants for us. We want 'x'. */ |
6efd877d KR |
556 | if ((ch = (*get) ()) != '\'') |
557 | { | |
fecd2382 | 558 | #ifdef REQUIRE_CHAR_CLOSE_QUOTE |
6efd877d | 559 | as_warn ("Missing close quote: (assumed)"); |
fecd2382 | 560 | #else |
6efd877d | 561 | (*unget) (ch); |
fecd2382 | 562 | #endif |
6efd877d KR |
563 | } |
564 | if (strlen (out_buf) == 1) | |
565 | { | |
566 | return out_buf[0]; | |
567 | } | |
9a7d824a ILT |
568 | if (state == 9 || state == 10) |
569 | old_state = 3; | |
570 | else | |
571 | old_state = state; | |
6efd877d KR |
572 | state = -1; |
573 | out_string = out_buf; | |
574 | return *out_string++; | |
7c2d4011 | 575 | #endif |
a39116f1 | 576 | #endif |
6efd877d | 577 | case LEX_IS_COLON: |
9a7d824a ILT |
578 | if (state == 9 || state == 10) |
579 | state = 3; | |
580 | else if (state != 3) | |
6efd877d KR |
581 | state = 0; |
582 | return ch; | |
583 | ||
584 | case LEX_IS_NEWLINE: | |
585 | /* Roll out a bunch of newlines from inside comments, etc. */ | |
586 | if (add_newlines) | |
587 | { | |
588 | --add_newlines; | |
589 | (*unget) (ch); | |
590 | } | |
591 | /* fall thru into... */ | |
592 | ||
593 | case LEX_IS_LINE_SEPARATOR: | |
594 | state = 0; | |
595 | return ch; | |
596 | ||
597 | case LEX_IS_LINE_COMMENT_START: | |
9a7d824a | 598 | if (state == 0) /* Only comment at start of line. */ |
6efd877d | 599 | { |
9a7d824a ILT |
600 | /* FIXME-someday: The two character comment stuff was badly |
601 | thought out. On i386, we want '/' as line comment start | |
602 | AND we want C style comments. hence this hack. The | |
603 | whole lexical process should be reworked. xoxorich. */ | |
604 | if (ch == '/') | |
f6a91cc0 | 605 | { |
9a7d824a ILT |
606 | ch2 = (*get) (); |
607 | if (ch2 == '*') | |
608 | { | |
609 | state = -2; | |
610 | return (do_scrub_next_char (get, unget)); | |
611 | } | |
612 | else | |
613 | { | |
614 | (*unget) (ch2); | |
615 | } | |
616 | } /* bad hack */ | |
6efd877d | 617 | |
385ce433 JL |
618 | if (ch != '#') |
619 | not_cpp_line = 1; | |
620 | ||
9a7d824a | 621 | do |
6efd877d | 622 | ch = (*get) (); |
9a7d824a | 623 | while (ch != EOF && IS_WHITESPACE (ch)); |
6efd877d | 624 | if (ch == EOF) |
9a7d824a ILT |
625 | { |
626 | as_warn ("EOF in comment: Newline inserted"); | |
627 | return '\n'; | |
628 | } | |
385ce433 | 629 | if (ch < '0' || ch > '9' || not_cpp_line) |
9a7d824a ILT |
630 | { |
631 | /* Non-numerics: Eat whole comment line */ | |
632 | while (ch != EOF && !IS_NEWLINE (ch)) | |
633 | ch = (*get) (); | |
634 | if (ch == EOF) | |
635 | as_warn ("EOF in Comment: Newline inserted"); | |
636 | state = 0; | |
637 | return '\n'; | |
638 | } | |
639 | /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */ | |
640 | (*unget) (ch); | |
641 | old_state = 4; | |
642 | state = -1; | |
643 | out_string = ".appline "; | |
644 | return *out_string++; | |
6efd877d | 645 | } |
6efd877d | 646 | |
9a7d824a ILT |
647 | /* We have a line comment character which is not at the start of |
648 | a line. If this is also a normal comment character, fall | |
649 | through. Otherwise treat it as a default character. */ | |
650 | if (strchr (comment_chars, ch) == NULL) | |
651 | goto de_fault; | |
652 | /* Fall through. */ | |
6efd877d KR |
653 | case LEX_IS_COMMENT_START: |
654 | do | |
655 | ch = (*get) (); | |
656 | while (ch != EOF && !IS_NEWLINE (ch)); | |
657 | if (ch == EOF) | |
658 | as_warn ("EOF in comment: Newline inserted"); | |
659 | state = 0; | |
660 | return '\n'; | |
661 | ||
f6a91cc0 | 662 | case LEX_IS_SYMBOL_COMPONENT: |
9a7d824a ILT |
663 | if (state == 10) |
664 | { | |
665 | /* This is a symbol character following another symbol | |
666 | character, with whitespace in between. We skipped the | |
667 | whitespace earlier, so output it now. */ | |
668 | (*unget) (ch); | |
669 | state = 3; | |
670 | return ' '; | |
671 | } | |
f6a91cc0 ILT |
672 | if (state == 3) |
673 | state = 9; | |
674 | /* Fall through. */ | |
6efd877d KR |
675 | default: |
676 | de_fault: | |
677 | /* Some relatively `normal' character. */ | |
678 | if (state == 0) | |
679 | { | |
680 | state = 2; /* Now seeing opcode */ | |
681 | return ch; | |
fecd2382 | 682 | } |
6efd877d KR |
683 | else if (state == 1) |
684 | { | |
685 | state = 2; /* Ditto */ | |
686 | return ch; | |
687 | } | |
f6a91cc0 ILT |
688 | else if (state == 9) |
689 | { | |
690 | if (lex[ch] != LEX_IS_SYMBOL_COMPONENT) | |
691 | state = 3; | |
692 | return ch; | |
693 | } | |
9a7d824a ILT |
694 | else if (state == 10) |
695 | { | |
696 | state = 3; | |
697 | return ch; | |
698 | } | |
6efd877d KR |
699 | else |
700 | { | |
701 | return ch; /* Opcode or operands already */ | |
702 | } | |
703 | } | |
704 | return -1; | |
fecd2382 RP |
705 | } |
706 | ||
707 | #ifdef TEST | |
708 | ||
6efd877d KR |
709 | const char comment_chars[] = "|"; |
710 | const char line_comment_chars[] = "#"; | |
fecd2382 | 711 | |
6efd877d | 712 | main () |
fecd2382 | 713 | { |
6efd877d KR |
714 | int ch; |
715 | ||
716 | app_begin (); | |
717 | while ((ch = do_scrub_next_char (stdin)) != EOF) | |
718 | putc (ch, stdout); | |
fecd2382 RP |
719 | } |
720 | ||
6efd877d KR |
721 | as_warn (str) |
722 | char *str; | |
fecd2382 | 723 | { |
6efd877d KR |
724 | fputs (str, stderr); |
725 | putc ('\n', stderr); | |
fecd2382 | 726 | } |
6efd877d | 727 | |
fecd2382 RP |
728 | #endif |
729 | ||
fecd2382 | 730 | /* end of app.c */ |