Commit | Line | Data |
---|---|---|
fecd2382 | 1 | /* This is the Assembler Pre-Processor |
5a051773 | 2 | Copyright (C) 1987, 1990, 1991, 1992, 1994 Free Software Foundation, Inc. |
6efd877d | 3 | |
a39116f1 | 4 | This file is part of GAS, the GNU Assembler. |
6efd877d | 5 | |
a39116f1 RP |
6 | GAS is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 2, or (at your option) | |
9 | any later version. | |
6efd877d | 10 | |
a39116f1 RP |
11 | GAS is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
6efd877d | 15 | |
a39116f1 RP |
16 | You should have received a copy of the GNU General Public License |
17 | along with GAS; see the file COPYING. If not, write to | |
a2a5a4fa | 18 | the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ |
fecd2382 | 19 | |
58d4951d | 20 | /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */ |
fecd2382 RP |
21 | /* App, the assembler pre-processor. This pre-processor strips out excess |
22 | spaces, turns single-quoted characters into a decimal constant, and turns | |
9a7d824a | 23 | # <number> <filename> <garbage> into a .line <number>\n.file <filename> |
a2a5a4fa | 24 | pair. This needs better error-handling. */ |
fecd2382 RP |
25 | |
26 | #include <stdio.h> | |
6efd877d | 27 | #include "as.h" /* For BAD_CASE() only */ |
fecd2382 | 28 | |
5a051773 SS |
29 | #if (__STDC__ != 1) |
30 | #ifndef const | |
31 | #define const /* empty */ | |
32 | #endif | |
fecd2382 RP |
33 | #endif |
34 | ||
6efd877d | 35 | static char lex[256]; |
6d331d71 | 36 | static const char symbol_chars[] = |
6efd877d | 37 | "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; |
fecd2382 RP |
38 | |
39 | #define LEX_IS_SYMBOL_COMPONENT 1 | |
40 | #define LEX_IS_WHITESPACE 2 | |
41 | #define LEX_IS_LINE_SEPARATOR 3 | |
42 | #define LEX_IS_COMMENT_START 4 | |
43 | #define LEX_IS_LINE_COMMENT_START 5 | |
44 | #define LEX_IS_TWOCHAR_COMMENT_1ST 6 | |
45 | #define LEX_IS_TWOCHAR_COMMENT_2ND 7 | |
46 | #define LEX_IS_STRINGQUOTE 8 | |
47 | #define LEX_IS_COLON 9 | |
48 | #define LEX_IS_NEWLINE 10 | |
49 | #define LEX_IS_ONECHAR_QUOTE 11 | |
a39116f1 RP |
50 | #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT) |
51 | #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE) | |
52 | #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR) | |
53 | #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START) | |
54 | #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START) | |
55 | #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE) | |
56 | ||
385ce433 JL |
57 | static int process_escape PARAMS ((int)); |
58 | ||
a39116f1 RP |
59 | /* FIXME-soon: The entire lexer/parser thingy should be |
60 | built statically at compile time rather than dynamically | |
61 | each and every time the assembler is run. xoxorich. */ | |
fecd2382 | 62 | |
6efd877d KR |
63 | void |
64 | do_scrub_begin () | |
65 | { | |
66 | const char *p; | |
67 | ||
68 | lex[' '] = LEX_IS_WHITESPACE; | |
69 | lex['\t'] = LEX_IS_WHITESPACE; | |
70 | lex['\n'] = LEX_IS_NEWLINE; | |
71 | lex[';'] = LEX_IS_LINE_SEPARATOR; | |
6efd877d | 72 | lex[':'] = LEX_IS_COLON; |
7c2d4011 | 73 | |
7172e226 ILT |
74 | if (! flag_mri) |
75 | { | |
76 | lex['"'] = LEX_IS_STRINGQUOTE; | |
be06bdcd | 77 | |
7172e226 ILT |
78 | #ifndef TC_HPPA |
79 | lex['\''] = LEX_IS_ONECHAR_QUOTE; | |
80 | #endif | |
be06bdcd SC |
81 | |
82 | #ifdef SINGLE_QUOTE_STRINGS | |
7172e226 | 83 | lex['\''] = LEX_IS_STRINGQUOTE; |
7c2d4011 | 84 | #endif |
7172e226 | 85 | } |
be06bdcd | 86 | |
a2a5a4fa KR |
87 | /* Note that these override the previous defaults, e.g. if ';' is a |
88 | comment char, then it isn't a line separator. */ | |
6efd877d KR |
89 | for (p = symbol_chars; *p; ++p) |
90 | { | |
58d4951d | 91 | lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; |
6efd877d KR |
92 | } /* declare symbol characters */ |
93 | ||
6efd877d KR |
94 | for (p = comment_chars; *p; p++) |
95 | { | |
58d4951d | 96 | lex[(unsigned char) *p] = LEX_IS_COMMENT_START; |
6efd877d KR |
97 | } /* declare comment chars */ |
98 | ||
9a7d824a ILT |
99 | for (p = line_comment_chars; *p; p++) |
100 | { | |
58d4951d | 101 | lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START; |
9a7d824a ILT |
102 | } /* declare line comment chars */ |
103 | ||
6efd877d KR |
104 | for (p = line_separator_chars; *p; p++) |
105 | { | |
58d4951d | 106 | lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR; |
6efd877d KR |
107 | } /* declare line separators */ |
108 | ||
109 | /* Only allow slash-star comments if slash is not in use */ | |
110 | if (lex['/'] == 0) | |
111 | { | |
112 | lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST; | |
113 | } | |
a2a5a4fa KR |
114 | /* FIXME-soon. This is a bad hack but otherwise, we can't do |
115 | c-style comments when '/' is a line comment char. xoxorich. */ | |
6efd877d KR |
116 | if (lex['*'] == 0) |
117 | { | |
118 | lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND; | |
119 | } | |
7172e226 ILT |
120 | |
121 | if (flag_mri) | |
122 | { | |
123 | lex['\''] = LEX_IS_STRINGQUOTE; | |
124 | lex[';'] = LEX_IS_COMMENT_START; | |
125 | lex['*'] = LEX_IS_LINE_COMMENT_START; | |
126 | /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but | |
127 | then it can't be used in an expression. */ | |
128 | lex['!'] = LEX_IS_LINE_COMMENT_START; | |
129 | } | |
6efd877d | 130 | } /* do_scrub_begin() */ |
fecd2382 RP |
131 | |
132 | FILE *scrub_file; | |
133 | ||
6efd877d KR |
134 | int |
135 | scrub_from_file () | |
136 | { | |
137 | return getc (scrub_file); | |
fecd2382 RP |
138 | } |
139 | ||
6efd877d KR |
140 | void |
141 | scrub_to_file (ch) | |
142 | int ch; | |
fecd2382 | 143 | { |
6efd877d KR |
144 | ungetc (ch, scrub_file); |
145 | } /* scrub_to_file() */ | |
fecd2382 RP |
146 | |
147 | char *scrub_string; | |
148 | char *scrub_last_string; | |
149 | ||
6efd877d KR |
150 | int |
151 | scrub_from_string () | |
152 | { | |
153 | return scrub_string == scrub_last_string ? EOF : *scrub_string++; | |
154 | } /* scrub_from_string() */ | |
fecd2382 | 155 | |
6efd877d KR |
156 | void |
157 | scrub_to_string (ch) | |
158 | int ch; | |
fecd2382 | 159 | { |
6efd877d KR |
160 | *--scrub_string = ch; |
161 | } /* scrub_to_string() */ | |
fecd2382 RP |
162 | |
163 | /* Saved state of the scrubber */ | |
164 | static int state; | |
165 | static int old_state; | |
166 | static char *out_string; | |
167 | static char out_buf[20]; | |
168 | static int add_newlines = 0; | |
169 | ||
170 | /* Data structure for saving the state of app across #include's. Note that | |
171 | app is called asynchronously to the parsing of the .include's, so our | |
172 | state at the time .include is interpreted is completely unrelated. | |
173 | That's why we have to save it all. */ | |
174 | ||
6efd877d KR |
175 | struct app_save |
176 | { | |
177 | int state; | |
178 | int old_state; | |
179 | char *out_string; | |
180 | char out_buf[sizeof (out_buf)]; | |
181 | int add_newlines; | |
182 | char *scrub_string; | |
183 | char *scrub_last_string; | |
184 | FILE *scrub_file; | |
185 | }; | |
186 | ||
187 | char * | |
188 | app_push () | |
189 | { | |
7c2d4011 SC |
190 | register struct app_save *saved; |
191 | ||
6efd877d KR |
192 | saved = (struct app_save *) xmalloc (sizeof (*saved)); |
193 | saved->state = state; | |
194 | saved->old_state = old_state; | |
195 | saved->out_string = out_string; | |
58d4951d | 196 | memcpy (saved->out_buf, out_buf, sizeof (out_buf)); |
6efd877d KR |
197 | saved->add_newlines = add_newlines; |
198 | saved->scrub_string = scrub_string; | |
7c2d4011 | 199 | saved->scrub_last_string = scrub_last_string; |
6efd877d | 200 | saved->scrub_file = scrub_file; |
7c2d4011 SC |
201 | |
202 | /* do_scrub_begin() is not useful, just wastes time. */ | |
6efd877d | 203 | return (char *) saved; |
fecd2382 RP |
204 | } |
205 | ||
6efd877d KR |
206 | void |
207 | app_pop (arg) | |
208 | char *arg; | |
fecd2382 | 209 | { |
6efd877d KR |
210 | register struct app_save *saved = (struct app_save *) arg; |
211 | ||
212 | /* There is no do_scrub_end (). */ | |
213 | state = saved->state; | |
214 | old_state = saved->old_state; | |
215 | out_string = saved->out_string; | |
58d4951d | 216 | memcpy (out_buf, saved->out_buf, sizeof (out_buf)); |
6efd877d KR |
217 | add_newlines = saved->add_newlines; |
218 | scrub_string = saved->scrub_string; | |
219 | scrub_last_string = saved->scrub_last_string; | |
220 | scrub_file = saved->scrub_file; | |
221 | ||
222 | free (arg); | |
223 | } /* app_pop() */ | |
224 | ||
6d331d71 KR |
225 | /* @@ This assumes that \n &c are the same on host and target. This is not |
226 | necessarily true. */ | |
385ce433 | 227 | static int |
6efd877d | 228 | process_escape (ch) |
385ce433 | 229 | int ch; |
7c2d4011 | 230 | { |
6efd877d KR |
231 | switch (ch) |
232 | { | |
233 | case 'b': | |
234 | return '\b'; | |
235 | case 'f': | |
236 | return '\f'; | |
237 | case 'n': | |
238 | return '\n'; | |
239 | case 'r': | |
240 | return '\r'; | |
241 | case 't': | |
242 | return '\t'; | |
243 | case '\'': | |
244 | return '\''; | |
245 | case '"': | |
6d331d71 | 246 | return '\"'; |
6efd877d KR |
247 | default: |
248 | return ch; | |
249 | } | |
7c2d4011 | 250 | } |
6efd877d KR |
251 | int |
252 | do_scrub_next_char (get, unget) | |
253 | int (*get) (); | |
254 | void (*unget) (); | |
fecd2382 | 255 | { |
6efd877d | 256 | /*State 0: beginning of normal line |
a39116f1 RP |
257 | 1: After first whitespace on line (flush more white) |
258 | 2: After first non-white (opcode) on line (keep 1white) | |
259 | 3: after second white on line (into operands) (flush white) | |
260 | 4: after putting out a .line, put out digits | |
261 | 5: parsing a string, then go to old-state | |
262 | 6: putting out \ escape in a "d string. | |
9a7d824a ILT |
263 | 7: After putting out a .appfile, put out string. |
264 | 8: After putting out a .appfile string, flush until newline. | |
f6a91cc0 | 265 | 9: After seeing symbol char in state 3 (keep 1white after symchar) |
9a7d824a | 266 | 10: After seeing whitespace in state 9 (keep white before symchar) |
a2a5a4fa KR |
267 | 11: After seeing a symbol character in state 0 (eg a label definition) |
268 | -1: output string in out_string and go to the state in old_state | |
269 | -2: flush text until a '*' '/' is seen, then go to state old_state | |
a39116f1 | 270 | */ |
6efd877d | 271 | |
9a7d824a ILT |
272 | /* I added states 9 and 10 because the MIPS ECOFF assembler uses |
273 | constructs like ``.loc 1 20''. This was turning into ``.loc | |
274 | 120''. States 9 and 10 ensure that a space is never dropped in | |
275 | between characters which could appear in a identifier. Ian | |
a2a5a4fa KR |
276 | Taylor, ian@cygnus.com. |
277 | ||
278 | I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works | |
279 | correctly on the PA (and any other target where colons are optional). | |
280 | Jeff Law, law@cs.utah.edu. */ | |
281 | ||
282 | /* This is purely an optimization hack, and relies on gcc's inlining | |
283 | capability. */ | |
284 | #if defined (__GNUC__) && defined (__OPTIMIZE__) | |
285 | #define GET() (get == scrub_from_file ? scrub_from_file () : (*get) ()) | |
286 | #else | |
287 | #define GET() ((*get) ()) | |
288 | #endif | |
f6a91cc0 | 289 | |
6efd877d | 290 | register int ch, ch2 = 0; |
385ce433 | 291 | int not_cpp_line = 0; |
6efd877d KR |
292 | |
293 | switch (state) | |
294 | { | |
295 | case -1: | |
296 | ch = *out_string++; | |
297 | if (*out_string == 0) | |
298 | { | |
299 | state = old_state; | |
300 | old_state = 3; | |
301 | } | |
302 | return ch; | |
303 | ||
304 | case -2: | |
305 | for (;;) | |
306 | { | |
307 | do | |
308 | { | |
a2a5a4fa | 309 | ch = GET (); |
6efd877d KR |
310 | } |
311 | while (ch != EOF && ch != '\n' && ch != '*'); | |
312 | if (ch == '\n' || ch == EOF) | |
313 | return ch; | |
314 | ||
315 | /* At this point, ch must be a '*' */ | |
a2a5a4fa | 316 | while ((ch = GET ()) == '*') |
6efd877d KR |
317 | { |
318 | ; | |
319 | } | |
320 | if (ch == EOF || ch == '/') | |
321 | break; | |
322 | (*unget) (ch); | |
323 | } | |
324 | state = old_state; | |
325 | return ' '; | |
326 | ||
327 | case 4: | |
a2a5a4fa | 328 | ch = GET (); |
6efd877d KR |
329 | if (ch == EOF || (ch >= '0' && ch <= '9')) |
330 | return ch; | |
331 | else | |
332 | { | |
333 | while (ch != EOF && IS_WHITESPACE (ch)) | |
a2a5a4fa | 334 | ch = GET (); |
6efd877d KR |
335 | if (ch == '"') |
336 | { | |
337 | (*unget) (ch); | |
001581c7 | 338 | out_string = "\n\t.appfile "; |
6efd877d KR |
339 | old_state = 7; |
340 | state = -1; | |
341 | return *out_string++; | |
342 | } | |
343 | else | |
344 | { | |
345 | while (ch != EOF && ch != '\n') | |
a2a5a4fa | 346 | ch = GET (); |
58d4951d | 347 | state = 0; |
6efd877d KR |
348 | return ch; |
349 | } | |
350 | } | |
351 | ||
352 | case 5: | |
a2a5a4fa | 353 | ch = GET (); |
6efd877d KR |
354 | if (lex[ch] == LEX_IS_STRINGQUOTE) |
355 | { | |
356 | state = old_state; | |
357 | return ch; | |
358 | } | |
a2a5a4fa | 359 | #ifndef NO_STRING_ESCAPES |
6efd877d KR |
360 | else if (ch == '\\') |
361 | { | |
362 | state = 6; | |
363 | return ch; | |
364 | } | |
a2a5a4fa | 365 | #endif |
6efd877d KR |
366 | else if (ch == EOF) |
367 | { | |
368 | as_warn ("End of file in string: inserted '\"'"); | |
369 | state = old_state; | |
370 | (*unget) ('\n'); | |
371 | return '"'; | |
372 | } | |
373 | else | |
374 | { | |
375 | return ch; | |
376 | } | |
377 | ||
378 | case 6: | |
379 | state = 5; | |
a2a5a4fa | 380 | ch = GET (); |
6efd877d KR |
381 | switch (ch) |
382 | { | |
6d331d71 KR |
383 | /* Handle strings broken across lines, by turning '\n' into |
384 | '\\' and 'n'. */ | |
6efd877d KR |
385 | case '\n': |
386 | (*unget) ('n'); | |
387 | add_newlines++; | |
388 | return '\\'; | |
389 | ||
390 | case '"': | |
391 | case '\\': | |
392 | case 'b': | |
393 | case 'f': | |
394 | case 'n': | |
395 | case 'r': | |
396 | case 't': | |
6efd877d | 397 | case 'v': |
385ce433 JL |
398 | case 'x': |
399 | case 'X': | |
6efd877d KR |
400 | case '0': |
401 | case '1': | |
402 | case '2': | |
403 | case '3': | |
404 | case '4': | |
405 | case '5': | |
406 | case '6': | |
407 | case '7': | |
408 | break; | |
7c2d4011 | 409 | #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES) |
6efd877d KR |
410 | default: |
411 | as_warn ("Unknown escape '\\%c' in string: Ignored", ch); | |
412 | break; | |
fecd2382 | 413 | #else /* ONLY_STANDARD_ESCAPES */ |
6efd877d KR |
414 | default: |
415 | /* Accept \x as x for any x */ | |
416 | break; | |
fecd2382 | 417 | #endif /* ONLY_STANDARD_ESCAPES */ |
7c2d4011 | 418 | |
6efd877d KR |
419 | case EOF: |
420 | as_warn ("End of file in string: '\"' inserted"); | |
421 | return '"'; | |
422 | } | |
423 | return ch; | |
424 | ||
425 | case 7: | |
a2a5a4fa | 426 | ch = GET (); |
6efd877d KR |
427 | state = 5; |
428 | old_state = 8; | |
429 | return ch; | |
430 | ||
431 | case 8: | |
432 | do | |
a2a5a4fa | 433 | ch = GET (); |
6efd877d KR |
434 | while (ch != '\n'); |
435 | state = 0; | |
436 | return ch; | |
437 | } | |
438 | ||
a2a5a4fa | 439 | /* OK, we are somewhere in states 0 through 4 or 9 through 11 */ |
6efd877d KR |
440 | |
441 | /* flushchar: */ | |
a2a5a4fa | 442 | ch = GET (); |
6efd877d KR |
443 | recycle: |
444 | if (ch == EOF) | |
445 | { | |
446 | if (state != 0) | |
a2a5a4fa KR |
447 | { |
448 | as_warn ("End of file not at end of a line: Newline inserted."); | |
449 | state = 0; | |
450 | return '\n'; | |
451 | } | |
6efd877d KR |
452 | return ch; |
453 | } | |
454 | ||
455 | switch (lex[ch]) | |
456 | { | |
457 | case LEX_IS_WHITESPACE: | |
458 | do | |
385ce433 JL |
459 | /* Preserve a single whitespace character at the beginning of |
460 | a line. */ | |
461 | if (state == 0) | |
462 | { | |
463 | state = 1; | |
464 | return ch; | |
465 | } | |
466 | else | |
a2a5a4fa | 467 | ch = GET (); |
6efd877d KR |
468 | while (ch != EOF && IS_WHITESPACE (ch)); |
469 | if (ch == EOF) | |
470 | return ch; | |
471 | ||
a2a5a4fa KR |
472 | if (IS_COMMENT (ch) |
473 | || (state == 0 && IS_LINE_COMMENT (ch)) | |
474 | || ch == '/' | |
475 | || IS_LINE_SEPARATOR (ch)) | |
6efd877d | 476 | { |
385ce433 JL |
477 | /* cpp never outputs a leading space before the #, so try to |
478 | avoid being confused. */ | |
479 | not_cpp_line = 1; | |
6efd877d | 480 | goto recycle; |
fecd2382 | 481 | } |
6efd877d | 482 | |
a2a5a4fa KR |
483 | /* If we're in state 2 or 11, we've seen a non-white character |
484 | followed by whitespace. If the next character is ':', this | |
7172e226 ILT |
485 | is whitespace after a label name which we normally must |
486 | ignore. In MRI mode, though, spaces are not permitted | |
487 | between the label and the colon. */ | |
488 | if ((state == 2 || state == 11) | |
489 | && lex[ch] == LEX_IS_COLON | |
490 | && ! flag_mri) | |
6efd877d | 491 | { |
a2a5a4fa | 492 | state = 1; |
6efd877d KR |
493 | return ch; |
494 | } | |
495 | ||
496 | switch (state) | |
497 | { | |
498 | case 0: | |
499 | state++; | |
500 | goto recycle; /* Punted leading sp */ | |
501 | case 1: | |
385ce433 JL |
502 | /* We can arrive here if we leave a leading whitespace character |
503 | at the beginning of a line. */ | |
504 | goto recycle; | |
6efd877d | 505 | case 2: |
f6a91cc0 | 506 | state = 3; |
6efd877d KR |
507 | (*unget) (ch); |
508 | return ' '; /* Sp after opco */ | |
509 | case 3: | |
510 | goto recycle; /* Sp in operands */ | |
9a7d824a ILT |
511 | case 9: |
512 | case 10: | |
513 | state = 10; /* Sp after symbol char */ | |
514 | goto recycle; | |
a2a5a4fa KR |
515 | case 11: |
516 | state = 1; | |
517 | (*unget) (ch); | |
518 | return ' '; /* Sp after label definition. */ | |
6efd877d KR |
519 | default: |
520 | BAD_CASE (state); | |
521 | } | |
522 | break; | |
523 | ||
524 | case LEX_IS_TWOCHAR_COMMENT_1ST: | |
a2a5a4fa | 525 | ch2 = GET (); |
6efd877d KR |
526 | if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) |
527 | { | |
528 | for (;;) | |
529 | { | |
530 | do | |
531 | { | |
a2a5a4fa | 532 | ch2 = GET (); |
6efd877d KR |
533 | if (ch2 != EOF && IS_NEWLINE (ch2)) |
534 | add_newlines++; | |
fecd2382 | 535 | } |
6efd877d KR |
536 | while (ch2 != EOF && |
537 | (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND)); | |
538 | ||
539 | while (ch2 != EOF && | |
540 | (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)) | |
541 | { | |
a2a5a4fa | 542 | ch2 = GET (); |
fecd2382 | 543 | } |
6efd877d KR |
544 | |
545 | if (ch2 == EOF | |
546 | || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST) | |
fecd2382 | 547 | break; |
6efd877d KR |
548 | (*unget) (ch); |
549 | } | |
550 | if (ch2 == EOF) | |
551 | as_warn ("End of file in multiline comment"); | |
552 | ||
553 | ch = ' '; | |
554 | goto recycle; | |
555 | } | |
556 | else | |
557 | { | |
558 | if (ch2 != EOF) | |
559 | (*unget) (ch2); | |
9a7d824a ILT |
560 | if (state == 9 || state == 10) |
561 | state = 3; | |
6efd877d KR |
562 | return ch; |
563 | } | |
564 | break; | |
565 | ||
566 | case LEX_IS_STRINGQUOTE: | |
9821fda9 ILT |
567 | if (state == 10) |
568 | { | |
569 | /* Preserve the whitespace in foo "bar" */ | |
570 | (*unget) (ch); | |
571 | state = 3; | |
572 | return ' '; | |
573 | } | |
574 | else if (state == 9) | |
9a7d824a ILT |
575 | old_state = 3; |
576 | else | |
577 | old_state = state; | |
6efd877d KR |
578 | state = 5; |
579 | return ch; | |
a39116f1 | 580 | #ifndef IEEE_STYLE |
6efd877d | 581 | case LEX_IS_ONECHAR_QUOTE: |
9821fda9 ILT |
582 | if (state == 10) |
583 | { | |
584 | /* Preserve the whitespace in foo 'b' */ | |
585 | (*unget) (ch); | |
586 | state = 3; | |
587 | return ' '; | |
588 | } | |
a2a5a4fa | 589 | ch = GET (); |
6efd877d KR |
590 | if (ch == EOF) |
591 | { | |
592 | as_warn ("End-of-file after a one-character quote; \\000 inserted"); | |
593 | ch = 0; | |
594 | } | |
595 | if (ch == '\\') | |
596 | { | |
a2a5a4fa | 597 | ch = GET (); |
6efd877d KR |
598 | ch = process_escape (ch); |
599 | } | |
600 | sprintf (out_buf, "%d", (int) (unsigned char) ch); | |
7c2d4011 | 601 | |
6efd877d | 602 | |
9a7d824a | 603 | /* None of these 'x constants for us. We want 'x'. */ |
a2a5a4fa | 604 | if ((ch = GET ()) != '\'') |
6efd877d | 605 | { |
fecd2382 | 606 | #ifdef REQUIRE_CHAR_CLOSE_QUOTE |
6efd877d | 607 | as_warn ("Missing close quote: (assumed)"); |
fecd2382 | 608 | #else |
6efd877d | 609 | (*unget) (ch); |
fecd2382 | 610 | #endif |
6efd877d KR |
611 | } |
612 | if (strlen (out_buf) == 1) | |
613 | { | |
614 | return out_buf[0]; | |
615 | } | |
9821fda9 | 616 | if (state == 9) |
9a7d824a ILT |
617 | old_state = 3; |
618 | else | |
619 | old_state = state; | |
6efd877d KR |
620 | state = -1; |
621 | out_string = out_buf; | |
622 | return *out_string++; | |
a39116f1 | 623 | #endif |
6efd877d | 624 | case LEX_IS_COLON: |
9a7d824a ILT |
625 | if (state == 9 || state == 10) |
626 | state = 3; | |
627 | else if (state != 3) | |
a2a5a4fa | 628 | state = 1; |
6efd877d KR |
629 | return ch; |
630 | ||
631 | case LEX_IS_NEWLINE: | |
632 | /* Roll out a bunch of newlines from inside comments, etc. */ | |
633 | if (add_newlines) | |
634 | { | |
635 | --add_newlines; | |
636 | (*unget) (ch); | |
637 | } | |
638 | /* fall thru into... */ | |
639 | ||
640 | case LEX_IS_LINE_SEPARATOR: | |
641 | state = 0; | |
642 | return ch; | |
643 | ||
644 | case LEX_IS_LINE_COMMENT_START: | |
9a7d824a | 645 | if (state == 0) /* Only comment at start of line. */ |
6efd877d | 646 | { |
9a7d824a ILT |
647 | /* FIXME-someday: The two character comment stuff was badly |
648 | thought out. On i386, we want '/' as line comment start | |
649 | AND we want C style comments. hence this hack. The | |
650 | whole lexical process should be reworked. xoxorich. */ | |
651 | if (ch == '/') | |
f6a91cc0 | 652 | { |
a2a5a4fa | 653 | ch2 = GET (); |
9a7d824a ILT |
654 | if (ch2 == '*') |
655 | { | |
656 | state = -2; | |
657 | return (do_scrub_next_char (get, unget)); | |
658 | } | |
659 | else | |
660 | { | |
661 | (*unget) (ch2); | |
662 | } | |
663 | } /* bad hack */ | |
6efd877d | 664 | |
385ce433 JL |
665 | if (ch != '#') |
666 | not_cpp_line = 1; | |
667 | ||
9a7d824a | 668 | do |
a2a5a4fa | 669 | ch = GET (); |
9a7d824a | 670 | while (ch != EOF && IS_WHITESPACE (ch)); |
6efd877d | 671 | if (ch == EOF) |
9a7d824a ILT |
672 | { |
673 | as_warn ("EOF in comment: Newline inserted"); | |
674 | return '\n'; | |
675 | } | |
385ce433 | 676 | if (ch < '0' || ch > '9' || not_cpp_line) |
9a7d824a ILT |
677 | { |
678 | /* Non-numerics: Eat whole comment line */ | |
679 | while (ch != EOF && !IS_NEWLINE (ch)) | |
a2a5a4fa | 680 | ch = GET (); |
9a7d824a ILT |
681 | if (ch == EOF) |
682 | as_warn ("EOF in Comment: Newline inserted"); | |
683 | state = 0; | |
684 | return '\n'; | |
685 | } | |
686 | /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */ | |
687 | (*unget) (ch); | |
688 | old_state = 4; | |
689 | state = -1; | |
001581c7 | 690 | out_string = "\t.appline "; |
9a7d824a | 691 | return *out_string++; |
6efd877d | 692 | } |
6efd877d | 693 | |
9a7d824a ILT |
694 | /* We have a line comment character which is not at the start of |
695 | a line. If this is also a normal comment character, fall | |
696 | through. Otherwise treat it as a default character. */ | |
7172e226 ILT |
697 | if ((flag_mri && (ch == '!' || ch == '*')) |
698 | || strchr (comment_chars, ch) == NULL) | |
9a7d824a ILT |
699 | goto de_fault; |
700 | /* Fall through. */ | |
6efd877d KR |
701 | case LEX_IS_COMMENT_START: |
702 | do | |
a2a5a4fa | 703 | ch = GET (); |
6efd877d KR |
704 | while (ch != EOF && !IS_NEWLINE (ch)); |
705 | if (ch == EOF) | |
706 | as_warn ("EOF in comment: Newline inserted"); | |
707 | state = 0; | |
708 | return '\n'; | |
709 | ||
f6a91cc0 | 710 | case LEX_IS_SYMBOL_COMPONENT: |
9a7d824a ILT |
711 | if (state == 10) |
712 | { | |
713 | /* This is a symbol character following another symbol | |
714 | character, with whitespace in between. We skipped the | |
715 | whitespace earlier, so output it now. */ | |
716 | (*unget) (ch); | |
717 | state = 3; | |
718 | return ' '; | |
719 | } | |
f6a91cc0 ILT |
720 | if (state == 3) |
721 | state = 9; | |
722 | /* Fall through. */ | |
6efd877d KR |
723 | default: |
724 | de_fault: | |
725 | /* Some relatively `normal' character. */ | |
726 | if (state == 0) | |
727 | { | |
a2a5a4fa | 728 | state = 11; /* Now seeing label definition */ |
6efd877d | 729 | return ch; |
fecd2382 | 730 | } |
6efd877d KR |
731 | else if (state == 1) |
732 | { | |
733 | state = 2; /* Ditto */ | |
734 | return ch; | |
735 | } | |
f6a91cc0 ILT |
736 | else if (state == 9) |
737 | { | |
738 | if (lex[ch] != LEX_IS_SYMBOL_COMPONENT) | |
739 | state = 3; | |
740 | return ch; | |
741 | } | |
9a7d824a ILT |
742 | else if (state == 10) |
743 | { | |
744 | state = 3; | |
745 | return ch; | |
746 | } | |
6efd877d KR |
747 | else |
748 | { | |
749 | return ch; /* Opcode or operands already */ | |
750 | } | |
751 | } | |
752 | return -1; | |
a2a5a4fa KR |
753 | |
754 | #undef GET | |
fecd2382 RP |
755 | } |
756 | ||
757 | #ifdef TEST | |
758 | ||
6efd877d KR |
759 | const char comment_chars[] = "|"; |
760 | const char line_comment_chars[] = "#"; | |
fecd2382 | 761 | |
6efd877d | 762 | main () |
fecd2382 | 763 | { |
6efd877d KR |
764 | int ch; |
765 | ||
766 | app_begin (); | |
767 | while ((ch = do_scrub_next_char (stdin)) != EOF) | |
768 | putc (ch, stdout); | |
fecd2382 RP |
769 | } |
770 | ||
6efd877d KR |
771 | as_warn (str) |
772 | char *str; | |
fecd2382 | 773 | { |
6efd877d KR |
774 | fputs (str, stderr); |
775 | putc ('\n', stderr); | |
fecd2382 | 776 | } |
6efd877d | 777 | |
fecd2382 RP |
778 | #endif |
779 | ||
fecd2382 | 780 | /* end of app.c */ |